コード例 #1
0
def household_activity_choices(indiv_utils, interaction_coefficients, hhsize,
                               trace_hh_id=None, trace_label=None):
    """
    Calculate household utilities for each activity pattern alternative for households of hhsize
    The resulting activity pattern for each household will be coded as a string of activity codes.
    e.g. 'MNHH' for a 4 person household with activities Mandatory, NonMandatory, Home, Home

    Parameters
    ----------
    indiv_utils : pandas.DataFrame
        CDAP utilities for each individual, ignoring interactions
        ind_utils has index of _persons_index_ and a column for each alternative
        i.e. three columns 'M' (Mandatory), 'N' (NonMandatory), 'H' (Home)

    interaction_coefficients : pandas.DataFrame
        Rules and coefficients for generating interaction specs for different household sizes

    hhsize : int
        the size of household for which activity perttern should be calculated (1..MAX_HHSIZE)

    Returns
    -------
    choices : pandas.Series
        the chosen cdap activity pattern for each household represented as a string (e.g. 'MNH')
        with same index (_hh_index_) as utils

    """

    if hhsize == 1:
        # for 1 person households, there are no interactions to account for
        # and the household utils are the same as the individual utils
        choosers = vars = None
        # extract the individual utilities for individuals from hhsize 1 households
        utils = indiv_utils.loc[indiv_utils[_hh_size_] == 1, [_hh_id_, 'M', 'N', 'H']]
        # index on household_id, not person_id
        set_hh_index(utils)
    else:

        choosers = hh_choosers(indiv_utils, hhsize=hhsize)

        spec = build_cdap_spec(interaction_coefficients, hhsize,
                               trace_spec=(trace_hh_id in choosers.index),
                               trace_label=trace_label)

        utils = simulate.eval_utilities(spec, choosers, trace_label=trace_label)

    if len(utils.index) == 0:
        return pd.Series(dtype='float64')

    probs = logit.utils_to_probs(utils, trace_label=trace_label)

    # select an activity pattern alternative for each household based on probability
    # result is a series indexed on _hh_index_ with the (0 based) index of the column from probs
    idx_choices, rands = logit.make_choices(probs, trace_label=trace_label)

    # convert choice expressed as index into alternative name from util column label
    choices = pd.Series(utils.columns[idx_choices].values, index=utils.index)

    if trace_hh_id:

        if hhsize > 1:
            tracing.trace_df(choosers, '%s.hhsize%d_choosers' % (trace_label, hhsize),
                             column_labels=['expression', 'person'])

        tracing.trace_df(utils, '%s.hhsize%d_utils' % (trace_label, hhsize),
                         column_labels=['expression', 'household'])
        tracing.trace_df(probs, '%s.hhsize%d_probs' % (trace_label, hhsize),
                         column_labels=['expression', 'household'])
        tracing.trace_df(choices, '%s.hhsize%d_activity_choices' % (trace_label, hhsize),
                         column_labels=['expression', 'household'])
        tracing.trace_df(rands, '%s.hhsize%d_rands' % (trace_label, hhsize),
                         columns=[None, 'rand'])

    return choices
コード例 #2
0
ファイル: pathbuilder.py プロジェクト: figo2002/activitysim
    def build_virtual_path(self,
                           recipe,
                           path_type,
                           orig,
                           dest,
                           tod,
                           demographic_segment,
                           want_choices,
                           trace_label,
                           filter_targets=None,
                           trace=False,
                           override_choices=None):

        trace_label = tracing.extend_trace_label(trace_label,
                                                 'build_virtual_path')

        # Tracing is implemented as a seperate, second call that operates ONLY on filter_targets
        assert not (trace and filter_targets is None)
        if filter_targets is not None:
            assert filter_targets.any()

            # slice orig and dest
            orig = orig[filter_targets]
            dest = dest[filter_targets]
            assert len(orig) > 0
            assert len(dest) > 0

            # slice tod and demographic_segment if not scalar
            if not isinstance(tod, str):
                tod = tod[filter_targets]
            if demographic_segment is not None:
                demographic_segment = demographic_segment[filter_targets]
                assert len(demographic_segment) > 0

            # slice choices
            # (requires actual choices from the previous call lest rands change on second call)
            assert want_choices == (override_choices is not None)
            if want_choices:
                override_choices = override_choices[filter_targets]

        units = self.units_for_recipe(recipe)
        assert units == 'utility' or not want_choices, "'want_choices' only supported supported if units is utility"

        access_mode = self.network_los.setting(
            f'TVPB_SETTINGS.{recipe}.path_types.{path_type}.access')
        egress_mode = self.network_los.setting(
            f'TVPB_SETTINGS.{recipe}.path_types.{path_type}.egress')
        path_types_settings = self.network_los.setting(
            f'TVPB_SETTINGS.{recipe}.path_types.{path_type}')
        attributes_as_columns = \
            self.network_los.setting(f'TVPB_SETTINGS.{recipe}.tap_tap_settings.attributes_as_columns', [])

        path_info = {
            'path_type': path_type,
            'access_mode': access_mode,
            'egress_mode': egress_mode
        }

        # maz od pairs requested
        with memo("#TVPB build_virtual_path maz_od_df"):
            maz_od_df = pd.DataFrame({
                'idx': orig.index.values,
                'omaz': orig.values,
                'dmaz': dest.values,
                'seq': range(len(orig))
            })
            chunk.log_df(trace_label, "maz_od_df", maz_od_df)
            self.trace_maz_tap(maz_od_df, access_mode, egress_mode)

        # for location choice, there will be multiple alt dest rows per chooser and duplicate orig.index values
        # but tod and demographic_segment should be the same for all chooser rows (unique orig index values)
        # knowing this allows us to eliminate redundant computations (e.g. utilities of maz_tap pairs)
        duplicated = orig.index.duplicated(keep='first')
        chooser_attributes = pd.DataFrame(index=orig.index[~duplicated])
        if not isinstance(tod, str):
            chooser_attributes['tod'] = tod.loc[~duplicated]
        elif 'tod' in attributes_as_columns:
            chooser_attributes['tod'] = tod
        else:
            path_info['tod'] = tod
        if demographic_segment is not None:
            chooser_attributes[
                'demographic_segment'] = demographic_segment.loc[~duplicated]

        with memo("#TVPB build_virtual_path access_df"):
            access_df = self.compute_maz_tap_utilities(recipe,
                                                       maz_od_df,
                                                       chooser_attributes,
                                                       leg='access',
                                                       mode=access_mode,
                                                       trace_label=trace_label,
                                                       trace=trace)
        chunk.log_df(trace_label, "access_df", access_df)

        with memo("#TVPB build_virtual_path egress_df"):
            egress_df = self.compute_maz_tap_utilities(recipe,
                                                       maz_od_df,
                                                       chooser_attributes,
                                                       leg='egress',
                                                       mode=egress_mode,
                                                       trace_label=trace_label,
                                                       trace=trace)
        chunk.log_df(trace_label, "egress_df", egress_df)

        # path_info for use by expressions (e.g. penalty for drive access if no parking at access tap)
        with memo("#TVPB build_virtual_path compute_tap_tap"):
            transit_df = self.compute_tap_tap(recipe,
                                              maz_od_df,
                                              access_df,
                                              egress_df,
                                              chooser_attributes,
                                              path_info=path_info,
                                              trace_label=trace_label,
                                              trace=trace)
        chunk.log_df(trace_label, "transit_df", transit_df)

        with memo("#TVPB build_virtual_path best_paths"):
            path_df = self.best_paths(recipe, path_type, maz_od_df, access_df,
                                      egress_df, transit_df, trace_label,
                                      trace)
        chunk.log_df(trace_label, "path_df", path_df)

        # now that we have created path_df, we are done with the dataframes for the separate legs
        del access_df
        chunk.log_df(trace_label, "access_df", None)
        del egress_df
        chunk.log_df(trace_label, "egress_df", None)
        del transit_df
        chunk.log_df(trace_label, "transit_df", None)

        if units == 'utility':

            # logsums
            with memo("#TVPB build_virtual_path logsums"):
                # one row per seq with utilities in columns
                # path_num 0-based to aligh with logit.make_choices 0-based choice indexes
                path_df['path_num'] = path_df.groupby('seq').cumcount()
                chunk.log_df(trace_label, "path_df", path_df)

                utilities_df = path_df[['seq', 'path_num',
                                        units]].set_index(['seq', 'path_num'
                                                           ]).unstack()
                utilities_df.columns = utilities_df.columns.droplevel(
                )  # for legibility

                # add rows missing because no access or egress availability
                utilities_df = pd.concat(
                    [pd.DataFrame(index=maz_od_df.seq), utilities_df], axis=1)
                utilities_df = utilities_df.fillna(
                    UNAVAILABLE
                )  # set utilities for missing paths to UNAVAILABLE

                chunk.log_df(trace_label, "utilities_df", utilities_df)

                with warnings.catch_warnings(record=True) as w:
                    # Cause all warnings to always be triggered.
                    # most likely "divide by zero encountered in log" caused by all transit sets non-viable
                    warnings.simplefilter("always")

                    paths_nest_nesting_coefficient = path_types_settings.get(
                        'paths_nest_nesting_coefficient', 1)
                    exp_utilities = np.exp(utilities_df.values /
                                           paths_nest_nesting_coefficient)
                    logsums = np.maximum(
                        np.log(np.nansum(exp_utilities, axis=1)), UNAVAILABLE)

                    if len(w) > 0:
                        for wrn in w:
                            logger.warning(
                                f"{trace_label} - {type(wrn).__name__} ({wrn.message})"
                            )

                        DUMP = False
                        if DUMP:
                            zero_utilities_df = utilities_df[np.nansum(
                                np.exp(utilities_df.values), axis=1) == 0]
                            zero_utilities_df.to_csv(config.output_file_path(
                                'warning_utilities_df.csv'),
                                                     index=True)
                            bug

            if want_choices:

                # orig index to identify appropriate random number channel to use making choices
                utilities_df.index = orig.index

                with memo("#TVPB build_virtual_path make_choices"):

                    probs = logit.utils_to_probs(utilities_df,
                                                 allow_zero_probs=True,
                                                 trace_label=trace_label)
                    chunk.log_df(trace_label, "probs", probs)

                    if trace:
                        choices = override_choices

                        utilities_df['choices'] = choices
                        self.trace_df(utilities_df, trace_label,
                                      'utilities_df')

                        probs['choices'] = choices
                        self.trace_df(probs, trace_label, 'probs')
                    else:

                        choices, rands = logit.make_choices(
                            probs,
                            allow_bad_probs=True,
                            trace_label=trace_label)

                        chunk.log_df(trace_label, "rands", rands)
                        del rands
                        chunk.log_df(trace_label, "rands", None)

                    del probs
                    chunk.log_df(trace_label, "probs", None)

                # we need to get path_set, btap, atap from path_df row with same seq and path_num
                # drop seq join column, but keep path_num of choice to override_choices when tracing
                columns_to_cache = ['btap', 'atap', 'path_set', 'path_num']
                logsum_df = \
                    pd.merge(pd.DataFrame({'seq': range(len(orig)), 'path_num': choices.values}),
                             path_df[['seq'] + columns_to_cache],
                             on=['seq', 'path_num'], how='left')\
                    .drop(columns=['seq'])\
                    .set_index(orig.index)

                logsum_df['logsum'] = logsums

            else:

                assert len(logsums) == len(orig)
                logsum_df = pd.DataFrame({'logsum': logsums}, index=orig.index)

            chunk.log_df(trace_label, "logsum_df", logsum_df)

            del utilities_df
            chunk.log_df(trace_label, "utilities_df", None)

            if trace:
                self.trace_df(logsum_df, trace_label, 'logsum_df')

            chunk.log_df(trace_label, "logsum_df", logsum_df)
            results = logsum_df

        else:
            assert units == 'time'

            # return a series
            results = pd.Series(path_df[units].values, index=path_df['idx'])

            # zero-fill rows for O-D pairs where no best path exists because there was no tap-tap transit availability
            results = reindex(results, maz_od_df.idx).fillna(0.0)

            chunk.log_df(trace_label, "results", results)

        assert len(results) == len(orig)

        del path_df
        chunk.log_df(trace_label, "path_df", None)

        # diagnostic
        # maz_od_df['DIST'] = self.network_los.get_default_skim_dict().get('DIST').get(maz_od_df.omaz, maz_od_df.dmaz)
        # maz_od_df[units] = results.logsum if units == 'utility' else results.values
        # print(f"maz_od_df\n{maz_od_df}")

        return results
コード例 #3
0
ファイル: cdap.py プロジェクト: UDST/activitysim
def household_activity_choices(indiv_utils, interaction_coefficients, hhsize,
                               trace_hh_id=None, trace_label=None):
    """
    Calculate household utilities for each activity pattern alternative for households of hhsize
    The resulting activity pattern for each household will be coded as a string of activity codes.
    e.g. 'MNHH' for a 4 person household with activities Mandatory, NonMandatory, Home, Home

    Parameters
    ----------
    indiv_utils : pandas.DataFrame
        CDAP utilities for each individual, ignoring interactions
        ind_utils has index of _persons_index_ and a column for each alternative
        i.e. three columns 'M' (Mandatory), 'N' (NonMandatory), 'H' (Home)

    interaction_coefficients : pandas.DataFrame
        Rules and coefficients for generating interaction specs for different household sizes

    hhsize : int
        the size of household for which activity perttern should be calculated (1..MAX_HHSIZE)

    Returns
    -------
    choices : pandas.Series
        the chosen cdap activity pattern for each household represented as a string (e.g. 'MNH')
        with same index (_hh_index_) as utils

    """

    if hhsize == 1:
        # for 1 person households, there are no interactions to account for
        # and the household utils are the same as the individual utils
        choosers = vars = None
        # extract the individual utilities for individuals from hhsize 1 households
        utils = indiv_utils.loc[indiv_utils[_hh_size_] == 1, [_hh_id_, 'M', 'N', 'H']]
        # index on household_id, not person_id
        set_hh_index(utils)
    else:

        choosers = hh_choosers(indiv_utils, hhsize=hhsize)

        spec = build_cdap_spec(interaction_coefficients, hhsize,
                               trace_spec=(trace_hh_id in choosers.index),
                               trace_label=trace_label)

        utils = simulate.eval_utilities(spec, choosers, trace_label=trace_label)

    if len(utils.index) == 0:
        return pd.Series()

    probs = logit.utils_to_probs(utils, trace_label=trace_label)

    # select an activity pattern alternative for each household based on probability
    # result is a series indexed on _hh_index_ with the (0 based) index of the column from probs
    idx_choices, rands = logit.make_choices(probs, trace_label=trace_label)

    # convert choice expressed as index into alternative name from util column label
    choices = pd.Series(utils.columns[idx_choices].values, index=utils.index)

    if trace_hh_id:

        if hhsize > 1:
            tracing.trace_df(choosers, '%s.hhsize%d_choosers' % (trace_label, hhsize),
                             column_labels=['expression', 'person'])

        tracing.trace_df(utils, '%s.hhsize%d_utils' % (trace_label, hhsize),
                         column_labels=['expression', 'household'])
        tracing.trace_df(probs, '%s.hhsize%d_probs' % (trace_label, hhsize),
                         column_labels=['expression', 'household'])
        tracing.trace_df(choices, '%s.hhsize%d_activity_choices' % (trace_label, hhsize),
                         column_labels=['expression', 'household'])
        tracing.trace_df(rands, '%s.hhsize%d_rands' % (trace_label, hhsize),
                         columns=[None, 'rand'])

    return choices
コード例 #4
0
def choose_tour_leg_pattern(trip_segment,
                            patterns, spec,
                            trace_label='trace_label'):
    alternatives = generate_alternatives(trip_segment, STOP_TIME_DURATION).sort_index()
    have_trace_targets = tracing.has_trace_targets(trip_segment)

    if have_trace_targets:
        tracing.trace_df(trip_segment, tracing.extend_trace_label(trace_label, 'choosers'))
        tracing.trace_df(alternatives, tracing.extend_trace_label(trace_label, 'alternatives'),
                         transpose=False)

    if len(spec.columns) > 1:
        raise RuntimeError('spec must have only one column')

    # - join choosers and alts
    # in vanilla interaction_simulate interaction_df is cross join of choosers and alternatives
    # interaction_df = logit.interaction_dataset(choosers, alternatives, sample_size)
    # here, alternatives is sparsely repeated once for each (non-dup) sample
    # we expect alternatives to have same index of choosers (but with duplicate index values)
    # so we just need to left join alternatives with choosers
    assert alternatives.index.name == trip_segment.index.name

    interaction_df = alternatives.join(trip_segment, how='left', rsuffix='_chooser')

    chunk.log_df(trace_label, 'interaction_df', interaction_df)

    if have_trace_targets:
        trace_rows, trace_ids = tracing.interaction_trace_rows(interaction_df, trip_segment)

        tracing.trace_df(interaction_df,
                         tracing.extend_trace_label(trace_label, 'interaction_df'),
                         transpose=False)
    else:
        trace_rows = trace_ids = None

    interaction_utilities, trace_eval_results \
        = interaction_simulate.eval_interaction_utilities(spec, interaction_df, None, trace_label, trace_rows,
                                                          estimator=None)

    interaction_utilities = pd.concat([interaction_df[STOP_TIME_DURATION], interaction_utilities], axis=1)
    chunk.log_df(trace_label, 'interaction_utilities', interaction_utilities)

    interaction_utilities = pd.merge(interaction_utilities.reset_index(),
                                     patterns[patterns[TRIP_ID].isin(interaction_utilities.index)],
                                     on=[TRIP_ID, STOP_TIME_DURATION], how='left')

    if have_trace_targets:
        tracing.trace_interaction_eval_results(trace_eval_results, trace_ids,
                                               tracing.extend_trace_label(trace_label, 'eval'))

        tracing.trace_df(interaction_utilities,
                         tracing.extend_trace_label(trace_label, 'interaction_utilities'),
                         transpose=False)

    del interaction_df
    chunk.log_df(trace_label, 'interaction_df', None)

    interaction_utilities = interaction_utilities.groupby([TOUR_ID, OUTBOUND, PATTERN_ID],
                                                          as_index=False)[['utility']].sum()

    interaction_utilities[TOUR_LEG_ID] = \
        interaction_utilities.apply(generate_tour_leg_id, axis=1)

    tour_choosers = interaction_utilities.set_index(TOUR_LEG_ID)
    interaction_utilities = tour_choosers[['utility']].copy()

    # reshape utilities (one utility column and one row per row in model_design)
    # to a dataframe with one row per chooser and one column per alternative
    # interaction_utilities is sparse because duplicate sampled alternatives were dropped
    # so we need to pad with dummy utilities so low that they are never chosen

    # number of samples per chooser
    sample_counts = interaction_utilities.groupby(interaction_utilities.index).size().values
    chunk.log_df(trace_label, 'sample_counts', sample_counts)

    # max number of alternatvies for any chooser
    max_sample_count = sample_counts.max()

    # offsets of the first and last rows of each chooser in sparse interaction_utilities
    last_row_offsets = sample_counts.cumsum()
    first_row_offsets = np.insert(last_row_offsets[:-1], 0, 0)

    # repeat the row offsets once for each dummy utility to insert
    # (we want to insert dummy utilities at the END of the list of alternative utilities)
    # inserts is a list of the indices at which we want to do the insertions
    inserts = np.repeat(last_row_offsets, max_sample_count - sample_counts)

    del sample_counts
    chunk.log_df(trace_label, 'sample_counts', None)

    # insert the zero-prob utilities to pad each alternative set to same size
    padded_utilities = np.insert(interaction_utilities.utility.values, inserts, -999)
    del inserts

    del interaction_utilities
    chunk.log_df(trace_label, 'interaction_utilities', None)

    # reshape to array with one row per chooser, one column per alternative
    padded_utilities = padded_utilities.reshape(-1, max_sample_count)
    chunk.log_df(trace_label, 'padded_utilities', padded_utilities)

    # convert to a dataframe with one row per chooser and one column per alternative
    utilities_df = pd.DataFrame(
        padded_utilities,
        index=tour_choosers.index.unique())
    chunk.log_df(trace_label, 'utilities_df', utilities_df)

    del padded_utilities
    chunk.log_df(trace_label, 'padded_utilities', None)

    if have_trace_targets:
        tracing.trace_df(utilities_df, tracing.extend_trace_label(trace_label, 'utilities'),
                         column_labels=['alternative', 'utility'])

    # convert to probabilities (utilities exponentiated and normalized to probs)
    # probs is same shape as utilities, one row per chooser and one column for alternative
    probs = logit.utils_to_probs(utilities_df,
                                 trace_label=trace_label, trace_choosers=trip_segment)

    chunk.log_df(trace_label, 'probs', probs)

    del utilities_df
    chunk.log_df(trace_label, 'utilities_df', None)

    if have_trace_targets:
        tracing.trace_df(probs, tracing.extend_trace_label(trace_label, 'probs'),
                         column_labels=['alternative', 'probability'])

    # make choices
    # positions is series with the chosen alternative represented as a column index in probs
    # which is an integer between zero and num alternatives in the alternative sample
    positions, rands = \
        logit.make_choices(probs, trace_label=trace_label, trace_choosers=trip_segment)

    chunk.log_df(trace_label, 'positions', positions)
    chunk.log_df(trace_label, 'rands', rands)

    del probs
    chunk.log_df(trace_label, 'probs', None)

    # shouldn't have chosen any of the dummy pad utilities
    assert positions.max() < max_sample_count

    # need to get from an integer offset into the alternative sample to the alternative index
    # that is, we want the index value of the row that is offset by <position> rows into the
    # tranche of this choosers alternatives created by cross join of alternatives and choosers

    # resulting pandas Int64Index has one element per chooser row and is in same order as choosers
    choices = tour_choosers[PATTERN_ID].take(positions + first_row_offsets)

    chunk.log_df(trace_label, 'choices', choices)

    if have_trace_targets:
        tracing.trace_df(choices, tracing.extend_trace_label(trace_label, 'choices'),
                         columns=[None, PATTERN_ID])
        tracing.trace_df(rands, tracing.extend_trace_label(trace_label, 'rands'),
                         columns=[None, 'rand'])

    return choices