예제 #1
0
def _compute_logsums(alt_tdd, tours_merged, tour_purpose, model_settings,
                     network_los, skims, trace_label):
    """
    compute logsums for tours using skims for alt_tdd out_period and in_period
    """

    trace_label = tracing.extend_trace_label(trace_label, 'logsums')

    with chunk.chunk_log(trace_label):
        logsum_settings = config.read_model_settings(
            model_settings['LOGSUM_SETTINGS'])
        choosers = alt_tdd.join(tours_merged, how='left', rsuffix='_chooser')
        logger.info(
            f"{trace_label} compute_logsums for {choosers.shape[0]} choosers {alt_tdd.shape[0]} alts"
        )

        # - locals_dict
        constants = config.get_model_constants(logsum_settings)
        locals_dict = {}
        locals_dict.update(constants)

        if network_los.zone_system == los.THREE_ZONE:
            # TVPB constants can appear in expressions
            locals_dict.update(
                network_los.setting(
                    'TVPB_SETTINGS.tour_mode_choice.CONSTANTS'))

        locals_dict.update(skims)

        # constrained coefficients can appear in expressions
        coefficients = simulate.get_segment_coefficients(
            logsum_settings, tour_purpose)
        locals_dict.update(coefficients)

        # - run preprocessor to annotate choosers
        # allow specification of alternate preprocessor for nontour choosers
        preprocessor = model_settings.get('LOGSUM_PREPROCESSOR',
                                          'preprocessor')
        preprocessor_settings = logsum_settings[preprocessor]

        if preprocessor_settings:

            simulate.set_skim_wrapper_targets(choosers, skims)

            expressions.assign_columns(df=choosers,
                                       model_settings=preprocessor_settings,
                                       locals_dict=locals_dict,
                                       trace_label=trace_label)

        # - compute logsums
        logsum_spec = simulate.read_model_spec(
            file_name=logsum_settings['SPEC'])
        logsum_spec = simulate.eval_coefficients(logsum_spec,
                                                 coefficients,
                                                 estimator=None)

        nest_spec = config.get_logit_model_settings(logsum_settings)
        nest_spec = simulate.eval_nest_coefficients(nest_spec, coefficients,
                                                    trace_label)

        logsums = simulate.simple_simulate_logsums(choosers,
                                                   logsum_spec,
                                                   nest_spec,
                                                   skims=skims,
                                                   locals_d=locals_dict,
                                                   chunk_size=0,
                                                   trace_label=trace_label)

    return logsums
예제 #2
0
    def build_virtual_path(self,
                           recipe,
                           path_type,
                           orig,
                           dest,
                           tod,
                           demographic_segment,
                           want_choices,
                           trace_label,
                           filter_targets=None,
                           trace=False,
                           override_choices=None):

        trace_label = tracing.extend_trace_label(trace_label,
                                                 'build_virtual_path')

        # Tracing is implemented as a seperate, second call that operates ONLY on filter_targets
        assert not (trace and filter_targets is None)
        if filter_targets is not None:
            assert filter_targets.any()

            # slice orig and dest
            orig = orig[filter_targets]
            dest = dest[filter_targets]
            assert len(orig) > 0
            assert len(dest) > 0

            # slice tod and demographic_segment if not scalar
            if not isinstance(tod, str):
                tod = tod[filter_targets]
            if demographic_segment is not None:
                demographic_segment = demographic_segment[filter_targets]
                assert len(demographic_segment) > 0

            # slice choices
            # (requires actual choices from the previous call lest rands change on second call)
            assert want_choices == (override_choices is not None)
            if want_choices:
                override_choices = override_choices[filter_targets]

        units = self.units_for_recipe(recipe)
        assert units == 'utility' or not want_choices, "'want_choices' only supported supported if units is utility"

        access_mode = self.network_los.setting(
            f'TVPB_SETTINGS.{recipe}.path_types.{path_type}.access')
        egress_mode = self.network_los.setting(
            f'TVPB_SETTINGS.{recipe}.path_types.{path_type}.egress')
        path_types_settings = self.network_los.setting(
            f'TVPB_SETTINGS.{recipe}.path_types.{path_type}')
        paths_nest_nesting_coefficient = path_types_settings.get(
            'paths_nest_nesting_coefficient', 1)

        # maz od pairs requested
        with memo("#TVPB build_virtual_path maz_od_df"):
            maz_od_df = pd.DataFrame({
                'idx': orig.index.values,
                'omaz': orig.values,
                'dmaz': dest.values,
                'seq': range(len(orig))
            })
            chunk.log_df(trace_label, "maz_od_df", maz_od_df)
            self.trace_maz_tap(maz_od_df, access_mode, egress_mode)

        # for location choice, there will be multiple alt dest rows per chooser and duplicate orig.index values
        # but tod and demographic_segment should be the same for all chooser rows (unique orig index values)
        # knowing this allows us to eliminate redundant computations (e.g. utilities of maz_tap pairs)
        duplicated = orig.index.duplicated(keep='first')
        chooser_attributes = pd.DataFrame(index=orig.index[~duplicated])
        chooser_attributes['tod'] = tod if isinstance(
            tod, str) else tod.loc[~duplicated]
        if demographic_segment is not None:
            chooser_attributes[
                'demographic_segment'] = demographic_segment.loc[~duplicated]

        with memo("#TVPB build_virtual_path access_df"):
            with chunk.chunk_log(f'#TVPB.access.{access_mode}'):
                access_df = self.compute_maz_tap_utilities(
                    recipe,
                    maz_od_df,
                    chooser_attributes,
                    leg='access',
                    mode=access_mode,
                    trace_label=trace_label,
                    trace=trace)
        chunk.log_df(trace_label, "access_df", access_df)

        with memo("#TVPB build_virtual_path egress_df"):
            with chunk.chunk_log(
                    f'#TVPB.compute_maz_tap_utilities.egress.{egress_mode}'):
                egress_df = self.compute_maz_tap_utilities(
                    recipe,
                    maz_od_df,
                    chooser_attributes,
                    leg='egress',
                    mode=egress_mode,
                    trace_label=trace_label,
                    trace=trace)
        chunk.log_df(trace_label, "egress_df", egress_df)

        # path_info for use by expressions (e.g. penalty for drive access if no parking at access tap)
        with memo("#TVPB build_virtual_path compute_tap_tap"):
            with chunk.chunk_log(f'#TVPB.compute_tap_tap'):
                path_info = {
                    'path_type': path_type,
                    'access_mode': access_mode,
                    'egress_mode': egress_mode
                }
                transit_df = self.compute_tap_tap(recipe,
                                                  maz_od_df,
                                                  access_df,
                                                  egress_df,
                                                  chooser_attributes,
                                                  path_info=path_info,
                                                  trace_label=trace_label,
                                                  trace=trace)
        chunk.log_df(trace_label, "transit_df", transit_df)

        with memo("#TVPB build_virtual_path best_paths"):
            with chunk.chunk_log(f'#TVPB.best_paths'):
                path_df = self.best_paths(recipe, path_type, maz_od_df,
                                          access_df, egress_df, transit_df,
                                          trace_label, trace)
        chunk.log_df(trace_label, "path_df", path_df)

        # now that we have created path_df, we are done with the dataframes for the separate legs
        del access_df
        chunk.log_df(trace_label, "access_df", None)
        del egress_df
        chunk.log_df(trace_label, "egress_df", None)
        del transit_df
        chunk.log_df(trace_label, "transit_df", None)

        if units == 'utility':

            # logsums
            with memo("#TVPB build_virtual_path logsums"):
                # one row per seq with utilities in columns
                # path_num 0-based to aligh with logit.make_choices 0-based choice indexes
                path_df['path_num'] = path_df.groupby('seq').cumcount()
                chunk.log_df(trace_label, "path_df", path_df)

                utilities_df = path_df[['seq', 'path_num',
                                        units]].set_index(['seq', 'path_num'
                                                           ]).unstack()
                utilities_df.columns = utilities_df.columns.droplevel(
                )  # for legibility

                # add rows missing because no access or egress availability
                utilities_df = pd.concat(
                    [pd.DataFrame(index=maz_od_df.seq), utilities_df], axis=1)
                utilities_df = utilities_df.fillna(
                    UNAVAILABLE
                )  # set utilities for missing paths to UNAVAILABLE

                chunk.log_df(trace_label, "utilities_df", utilities_df)

                logsums = np.maximum(
                    np.log(
                        np.nansum(np.exp(utilities_df.values /
                                         paths_nest_nesting_coefficient),
                                  axis=1)), UNAVAILABLE)

            if want_choices:

                # orig index to identify appropriate random number channel to use making choices
                utilities_df.index = orig.index

                with memo("#TVPB build_virtual_path make_choices"):

                    probs = logit.utils_to_probs(utilities_df,
                                                 allow_zero_probs=True,
                                                 trace_label=trace_label)
                    chunk.log_df(trace_label, "probs", probs)

                    if trace:
                        choices = override_choices

                        utilities_df['choices'] = choices
                        self.trace_df(utilities_df, trace_label,
                                      'utilities_df')

                        probs['choices'] = choices
                        self.trace_df(probs, trace_label, 'probs')
                    else:

                        choices, rands = logit.make_choices(
                            probs,
                            allow_bad_probs=True,
                            trace_label=trace_label)

                        chunk.log_df(trace_label, "rands", rands)
                        del rands
                        chunk.log_df(trace_label, "rands", None)

                    del probs
                    chunk.log_df(trace_label, "probs", None)

                # we need to get path_set, btap, atap from path_df row with same seq and path_num
                # drop seq join column, but keep path_num of choice to override_choices when tracing
                columns_to_cache = ['btap', 'atap', 'path_set', 'path_num']
                logsum_df = \
                    pd.merge(pd.DataFrame({'seq': range(len(orig)), 'path_num': choices.values}),
                             path_df[['seq'] + columns_to_cache],
                             on=['seq', 'path_num'], how='left')\
                    .drop(columns=['seq'])\
                    .set_index(orig.index)

                logsum_df['logsum'] = logsums

            else:

                assert len(logsums) == len(orig)
                logsum_df = pd.DataFrame({'logsum': logsums}, index=orig.index)

            chunk.log_df(trace_label, "logsum_df", logsum_df)

            del utilities_df
            chunk.log_df(trace_label, "utilities_df", None)

            if trace:
                self.trace_df(logsum_df, trace_label, 'logsum_df')

            chunk.log_df(trace_label, "logsum_df", logsum_df)
            results = logsum_df

        else:
            assert units == 'time'

            # return a series
            results = pd.Series(path_df[units].values, index=path_df['idx'])

            # zero-fill rows for O-D pairs where no best path exists because there was no tap-tap transit availability
            results = reindex(results, maz_od_df.idx).fillna(0.0)

            chunk.log_df(trace_label, "results", results)

        assert len(results) == len(orig)

        del path_df
        chunk.log_df(trace_label, "path_df", None)

        # diagnostic
        # maz_od_df['DIST'] = self.network_los.get_default_skim_dict().get('DIST').get(maz_od_df.omaz, maz_od_df.dmaz)
        # maz_od_df[units] = results.logsum if units == 'utility' else results.values
        # print(f"maz_od_df\n{maz_od_df}")

        return results
예제 #3
0
def schedule_trips_in_leg(outbound, trips, probs_spec, model_settings,
                          last_iteration, trace_hh_id, trace_label):
    """

    Parameters
    ----------
    outbound
    trips
    probs_spec
    depart_alt_base
    last_iteration
    trace_hh_id
    trace_label

    Returns
    -------
    choices: pd.Series
        depart choice for trips, indexed by trip_id
    """

    failfix = model_settings.get(FAILFIX, FAILFIX_DEFAULT)

    # logger.debug("%s scheduling %s trips" % (trace_label, trips.shape[0]))

    assert len(trips) > 0

    assert (trips.outbound == outbound).all()

    # initial trip of leg and all atwork trips get tour_hour
    is_initial = (trips.trip_num == 1) if outbound else (trips.trip_num
                                                         == trips.trip_count)
    no_scheduling = is_initial | (trips.primary_purpose == 'atwork')
    choices = trips.tour_hour[no_scheduling]

    if no_scheduling.all():
        return choices

    result_list = []
    result_list.append(choices)
    trips = trips[~no_scheduling]

    # add next_trip_id temp column (temp as trips is now a copy, as result of slicing)
    trips = trips.sort_index()
    trips['next_trip_id'] = np.roll(trips.index, -1 if outbound else 1)
    is_final = (trips.trip_num
                == trips.trip_count) if outbound else (trips.trip_num == 1)
    trips.next_trip_id = trips.next_trip_id.where(~is_final, NO_TRIP_ID)

    # iterate over outbound trips in ascending trip_num order, skipping the initial trip
    # iterate over inbound trips in descending trip_num order, skipping the finial trip
    first_trip_in_leg = True
    for i in range(trips.trip_num.min(), trips.trip_num.max() + 1):

        if outbound:
            nth_trips = trips[trips.trip_num == i]
        else:
            nth_trips = trips[trips.trip_num == trips.trip_count - i]

        nth_trace_label = tracing.extend_trace_label(trace_label, 'num_%s' % i)

        with chunk.chunk_log(nth_trace_label):
            choices = schedule_nth_trips(nth_trips,
                                         probs_spec,
                                         model_settings,
                                         first_trip_in_leg=first_trip_in_leg,
                                         report_failed_trips=last_iteration,
                                         trace_hh_id=trace_hh_id,
                                         trace_label=nth_trace_label)

        # if outbound, this trip's depart constrains next trip's earliest depart option
        # if inbound, we are handling in reverse order, so it constrains latest depart instead
        ADJUST_NEXT_DEPART_COL = 'earliest' if outbound else 'latest'

        # most initial departure (when no choice was made because all probs were zero)
        if last_iteration and (failfix == FAILFIX_CHOOSE_MOST_INITIAL):
            choices = choices.reindex(nth_trips.index)
            logger.warning("%s coercing %s depart choices to most initial" %
                           (nth_trace_label, choices.isna().sum()))
            choices = choices.fillna(trips[ADJUST_NEXT_DEPART_COL])

        # adjust allowed depart range of next trip
        has_next_trip = (nth_trips.next_trip_id != NO_TRIP_ID)
        if has_next_trip.any():
            next_trip_ids = nth_trips.next_trip_id[has_next_trip]
            # patch choice any trips with next_trips that weren't scheduled
            trips.loc[next_trip_ids, ADJUST_NEXT_DEPART_COL] = \
                choices.reindex(next_trip_ids.index).fillna(trips[ADJUST_NEXT_DEPART_COL]).values

        result_list.append(choices)

        first_trip_in_leg = False

    if len(result_list) > 1:
        choices = pd.concat(result_list)

    return choices
예제 #4
0
    def best_paths(self,
                   recipe,
                   path_type,
                   maz_od_df,
                   access_df,
                   egress_df,
                   transit_df,
                   trace_label,
                   trace=False):

        trace_label = tracing.extend_trace_label(trace_label, 'best_paths')

        with chunk.chunk_log(trace_label):

            path_settings = self.network_los.setting(
                f'TVPB_SETTINGS.{recipe}.path_types.{path_type}')
            max_paths_per_tap_set = path_settings.get('max_paths_per_tap_set',
                                                      1)
            max_paths_across_tap_sets = path_settings.get(
                'max_paths_across_tap_sets', 1)

            units = self.units_for_recipe(recipe)
            smaller_is_better = (units in ['time'])

            maz_od_df['seq'] = maz_od_df.index
            # maz_od_df has one row per chooser
            # inner join to add rows for each access, egress, and transit segment combination
            path_df = maz_od_df. \
                merge(access_df, on=['idx', 'omaz'], how='inner'). \
                merge(egress_df, on=['idx', 'dmaz'], how='inner'). \
                merge(transit_df, on=['idx', 'atap', 'btap'], how='inner')

            chunk.log_df(trace_label, "path_df", path_df)

            # transit sets are the transit_df non-join columns
            transit_sets = [
                c for c in transit_df.columns
                if c not in ['idx', 'atap', 'btap']
            ]

            if trace:
                # be nice and show both tap_tap set utility and total_set = access + set + egress
                for c in transit_sets:
                    path_df[f'total_{c}'] = path_df[c] + path_df[
                        'access'] + path_df['egress']
                self.trace_df(path_df, trace_label, 'best_paths.full')
                for c in transit_sets:
                    del path_df[f'total_{c}']

            for c in transit_sets:
                path_df[c] = path_df[c] + path_df['access'] + path_df['egress']
            path_df.drop(columns=['access', 'egress'], inplace=True)

            # choose best paths by tap set
            best_paths_list = []
            for c in transit_sets:
                keep = path_df.index.isin(path_df[['seq', c]].sort_values(
                    by=c, ascending=smaller_is_better).groupby(
                        ['seq']).head(max_paths_per_tap_set).index)

                best_paths_for_set = path_df[keep]
                best_paths_for_set['path_set'] = c  # remember the path set
                best_paths_for_set[units] = path_df[keep][c]
                best_paths_for_set.drop(columns=transit_sets, inplace=True)
                best_paths_list.append(best_paths_for_set)

            path_df = pd.concat(best_paths_list).sort_values(
                by=['seq', units], ascending=[True, smaller_is_better])

            # choose best paths overall by seq
            path_df = path_df.sort_values(by=['seq', units],
                                          ascending=[True, smaller_is_better])
            path_df = path_df[path_df.index.isin(
                path_df.groupby(['seq'
                                 ]).head(max_paths_across_tap_sets).index)]

            if trace:
                self.trace_df(path_df, trace_label, 'best_paths')

        return path_df
예제 #5
0
    def compute_tap_tap_time(self, recipe, access_df, egress_df,
                             chooser_attributes, path_info, trace_label,
                             trace):

        trace_label = tracing.extend_trace_label(trace_label,
                                                 'compute_tap_tap_time')

        with chunk.chunk_log(trace_label):

            model_constants = self.network_los.setting(
                f'TVPB_SETTINGS.{recipe}.CONSTANTS')
            tap_tap_settings = self.network_los.setting(
                f'TVPB_SETTINGS.{recipe}.tap_tap_settings')

            with memo(
                    "#TVPB CACHE compute_tap_tap_utilities all_transit_paths"):
                transit_df = self.all_transit_paths(access_df, egress_df,
                                                    chooser_attributes,
                                                    trace_label, trace)
                # note: transit_df index is arbitrary
                chunk.log_df(trace_label, "transit_df", transit_df)

            # some expressions may want to know access mode -
            locals_dict = path_info.copy()
            locals_dict['los'] = self.network_los
            locals_dict.update(model_constants)

            assignment_spec = assign.read_assignment_spec(
                file_name=config.config_file_path(tap_tap_settings['SPEC']))

            DEDUPE = True
            if DEDUPE:

                # assign uid for reduping
                max_atap = transit_df.atap.max() + 1
                transit_df[
                    'uid'] = transit_df.btap * max_atap + transit_df.atap

                # dedupe
                chooser_attribute_columns = list(chooser_attributes.columns)
                unique_transit_df = \
                    transit_df.loc[~transit_df.uid.duplicated(), ['btap', 'atap', 'uid'] + chooser_attribute_columns]
                unique_transit_df.set_index('uid', inplace=True)
                chunk.log_df(trace_label, "unique_transit_df",
                             unique_transit_df)

                logger.debug(
                    f"#TVPB CACHE deduped transit_df from {len(transit_df)} to {len(unique_transit_df)}"
                )

                # assign_variables
                results, _, _ = assign.assign_variables(
                    assignment_spec, unique_transit_df, locals_dict)
                assert len(results.columns == 1)
                unique_transit_df['transit'] = results

                # redupe results back into transit_df
                with memo("#TVPB compute_tap_tap_time redupe transit_df"):
                    transit_df['transit'] = reindex(unique_transit_df.transit,
                                                    transit_df.uid)

                del transit_df['uid']
                del unique_transit_df
                chunk.log_df(trace_label, "transit_df", transit_df)
                chunk.log_df(trace_label, "unique_transit_df", None)

            else:
                results, _, _ = assign.assign_variables(
                    assignment_spec, transit_df, locals_dict)
                assert len(results.columns == 1)
                transit_df['transit'] = results

            # filter out unavailable btap_atap pairs
            logger.debug(
                f"{(transit_df['transit'] <= 0).sum()} unavailable tap_tap pairs out of {len(transit_df)}"
            )
            transit_df = transit_df[transit_df.transit > 0]

            transit_df.drop(columns=chooser_attributes.columns, inplace=True)

            chunk.log_df(trace_label, "transit_df", None)

            if trace:
                self.trace_df(transit_df, trace_label, 'transit_df')

        return transit_df
예제 #6
0
    def lookup_tap_tap_utilities(self, recipe, maz_od_df, access_df, egress_df,
                                 chooser_attributes, path_info, trace_label):
        """
        create transit_df and compute utilities for all atap-btap pairs between omaz in access and dmaz in egress_df
        look up the utilities in the precomputed tap_cache data (which is indexed by uid_calculator unique_ids)
        (unique_id can used as a zero-based index into the data array)

        transit_df contains all possible access omaz/btap to egress dmaz/atap transit path pairs for each chooser

        Parameters
        ----------
        recipe
        maz_od_df
        access_df
        egress_df
        chooser_attributes
        path_info
        trace_label

        Returns
        -------

        """

        trace_label = tracing.extend_trace_label(trace_label,
                                                 'lookup_tap_tap_utils')

        with chunk.chunk_log(trace_label):

            with memo(
                    "#TVPB CACHE lookup_tap_tap_utilities all_transit_paths"):
                transit_df = self.all_transit_paths(access_df,
                                                    egress_df,
                                                    chooser_attributes,
                                                    trace_label,
                                                    trace=False)
                # note: transit_df index is arbitrary
                chunk.log_df(trace_label, "transit_df", transit_df)

            if TRACE_COMPLEXITY:
                # diagnostic: log the omaz,dmaz pairs with the greatest number of virtual tap-tap paths
                num_paths = transit_df.groupby(['idx']).size().to_frame('n')
                num_paths = pd.merge(maz_od_df,
                                     num_paths,
                                     left_on='idx',
                                     right_index=True)
                num_paths = num_paths[[
                    'omaz', 'dmaz', 'n'
                ]].drop_duplicates(subset=['omaz', 'dmaz'])
                num_paths = num_paths.sort_values(
                    'n', ascending=False).reset_index(drop=True)
                logger.debug(f"num_paths\n{num_paths.head(10)}")

            # FIXME some expressions may want to know access mode -
            locals_dict = path_info.copy()

            # add uid column to transit_df
            with memo("#TVPB lookup_tap_tap_utilities assign uid"):
                attribute_segments = \
                    self.network_los.setting('TVPB_SETTINGS.tour_mode_choice.tap_tap_settings.attribute_segments')
                scalar_attributes = {
                    k: locals_dict[k]
                    for k in attribute_segments.keys() if k not in transit_df
                }

                transit_df.index = self.uid_calculator.get_unique_ids(
                    transit_df, scalar_attributes)
                transit_df = transit_df[[
                    'idx', 'btap', 'atap'
                ]]  # just needed chooser_columns for uid calculation
                chunk.log_df(trace_label, "transit_df add uid index",
                             transit_df)

            with memo("#TVPB lookup_tap_tap_utilities reindex transit_df"):
                utilities = self.tap_cache.data
                i = 0
                for column_name in self.uid_calculator.set_names:
                    transit_df[column_name] = utilities[
                        transit_df.index.values, i]
                    i += 1

            for c in self.uid_calculator.set_names:
                assert ERR_CHECK and not transit_df[c].isnull().any()

            chunk.log_df(trace_label, "transit_df", None)

        return transit_df
예제 #7
0
    def compute_tap_tap_utilities(self, recipe, access_df, egress_df,
                                  chooser_attributes, path_info, trace_label,
                                  trace):
        """
        create transit_df and compute utilities for all atap-btap pairs between omaz in access and dmaz in egress_df
        compute the utilities using the tap_tap utility expressions file specified in tap_tap_settings

        transit_df contains all possible access omaz/btap to egress dmaz/atap transit path pairs for each chooser

        trace should be True as we don't encourage/support dynamic utility computation except when tracing
        (precompute being fairly fast)

        Parameters
        ----------
        recipe: str
           'recipe' key in network_los.yaml TVPB_SETTINGS e.g. tour_mode_choice
        access_df: pandas.DataFrame
            dataframe with 'idx' and 'omaz' columns
        egress_df: pandas.DataFrame
            dataframe with 'idx' and 'dmaz' columns
        chooser_attributes: dict
        path_info
        trace_label: str
        trace: boolean

        Returns
        -------
        transit_df: pandas.dataframe
        """

        assert trace

        trace_label = tracing.extend_trace_label(trace_label,
                                                 'compute_tap_tap_utils')

        with chunk.chunk_log(trace_label):

            model_constants = self.network_los.setting(
                f'TVPB_SETTINGS.{recipe}.CONSTANTS')
            tap_tap_settings = self.network_los.setting(
                f'TVPB_SETTINGS.{recipe}.tap_tap_settings')

            with memo(
                    "#TVPB CACHE compute_tap_tap_utilities all_transit_paths"):
                transit_df = self.all_transit_paths(access_df, egress_df,
                                                    chooser_attributes,
                                                    trace_label, trace)
                # note: transit_df index is arbitrary
            chunk.log_df(trace_label, "transit_df", transit_df)

            # FIXME some expressions may want to know access mode -
            locals_dict = path_info.copy()
            locals_dict.update(model_constants)

            # columns needed for compute_utilities
            chooser_columns = ['btap', 'atap'] + list(
                chooser_attributes.columns)

            # deduplicate transit_df to unique_transit_df
            with memo(
                    "#TVPB compute_tap_tap_utilities deduplicate transit_df"):

                attribute_segments = \
                    self.network_los.setting('TVPB_SETTINGS.tour_mode_choice.tap_tap_settings.attribute_segments')
                scalar_attributes = {
                    k: locals_dict[k]
                    for k in attribute_segments.keys() if k not in transit_df
                }

                transit_df['uid'] = self.uid_calculator.get_unique_ids(
                    transit_df, scalar_attributes)

                unique_transit_df = transit_df.loc[
                    ~transit_df.uid.duplicated(), chooser_columns + ['uid']]
                logger.debug(
                    f"#TVPB CACHE deduped transit_df from {len(transit_df)} to {len(unique_transit_df)}"
                )

                unique_transit_df.set_index('uid', inplace=True)
                chunk.log_df(trace_label, "unique_transit_df",
                             unique_transit_df)

                transit_df = transit_df[['idx', 'btap', 'atap',
                                         'uid']]  # don't need chooser columns
                chunk.log_df(trace_label, "transit_df", transit_df)

            logger.debug(
                f"#TVPB CACHE compute_tap_tap_utilities dedupe transit_df "
                f"from {len(transit_df)} to {len(unique_transit_df)} rows")

            num_unique_transit_rows = len(unique_transit_df)  # errcheck
            logger.debug(
                f"#TVPB CACHE compute_tap_tap_utilities compute_utilities for {len(unique_transit_df)} rows"
            )

            with memo("#TVPB compute_tap_tap_utilities compute_utilities"):
                unique_utilities_df = compute_utilities(
                    self.network_los,
                    tap_tap_settings,
                    choosers=unique_transit_df,
                    model_constants=locals_dict,
                    trace_label=trace_label,
                    trace=trace,
                    trace_column_names=chooser_columns if trace else None)
                chunk.log_df(trace_label, "unique_utilities_df",
                             unique_utilities_df)
                chunk.log_df(trace_label, "unique_transit_df",
                             unique_transit_df)  # annotated

                if trace:
                    # combine unique_transit_df with unique_utilities_df for legibility
                    omnibus_df = pd.merge(unique_transit_df,
                                          unique_utilities_df,
                                          left_index=True,
                                          right_index=True,
                                          how='left')
                    self.trace_df(omnibus_df, trace_label,
                                  'unique_utilities_df')
                    chunk.log_df(trace_label, "omnibus_df", omnibus_df)
                    del omnibus_df
                    chunk.log_df(trace_label, "omnibus_df", None)

            assert num_unique_transit_rows == len(
                unique_utilities_df)  # errcheck

            # redupe unique_transit_df back into transit_df
            with memo("#TVPB compute_tap_tap_utilities redupe transit_df"):

                # idx = transit_df.index
                transit_df = pd.merge(transit_df,
                                      unique_utilities_df,
                                      left_on='uid',
                                      right_index=True)
                del transit_df['uid']
                # transit_df.index = idx
                # note: left merge on columns does not preserve index,
                # but transit_df index is arbitrary so no need to restore

                chunk.log_df(trace_label, "transit_df", transit_df)

            for c in unique_utilities_df:
                assert ERR_CHECK and not transit_df[c].isnull().any()

            if len(unique_transit_df) > 0:
                # if all rows were cached, then unique_utilities_df is just a ref to cache
                del unique_utilities_df
                chunk.log_df(trace_label, "unique_utilities_df", None)

            chunk.log_df(trace_label, "transit_df", None)

            if trace:
                self.trace_df(transit_df, trace_label, 'transit_df')

        return transit_df
예제 #8
0
    def compute_maz_tap_utilities(self, recipe, maz_od_df, chooser_attributes,
                                  leg, mode, trace_label, trace):

        trace_label = tracing.extend_trace_label(trace_label,
                                                 f'maz_tap_utils.{leg}')

        with chunk.chunk_log(trace_label):
            maz_tap_settings = \
                self.network_los.setting(f'TVPB_SETTINGS.{recipe}.maz_tap_settings.{mode}')
            chooser_columns = maz_tap_settings['CHOOSER_COLUMNS']
            attribute_columns = list(
                chooser_attributes.columns
            ) if chooser_attributes is not None else []
            model_constants = self.network_los.setting(
                f'TVPB_SETTINGS.{recipe}.CONSTANTS')

            if leg == 'access':
                maz_col = 'omaz'
                tap_col = 'btap'
            else:
                maz_col = 'dmaz'
                tap_col = 'atap'

            # maz_to_tap access/egress utilities
            # deduped utilities_df - one row per chooser for each boarding tap (btap) accessible from omaz
            utilities_df = self.network_los.maz_to_tap_dfs[mode]

            utilities_df = utilities_df[chooser_columns]. \
                reset_index(drop=False). \
                rename(columns={'MAZ': maz_col, 'TAP': tap_col})
            utilities_df = pd.merge(maz_od_df[['idx',
                                               maz_col]].drop_duplicates(),
                                    utilities_df,
                                    on=maz_col,
                                    how='inner')

            if len(utilities_df) == 0:
                trace = False
            # add any supplemental chooser attributes (e.g. demographic_segment, tod)
            for c in attribute_columns:
                utilities_df[c] = reindex(chooser_attributes[c],
                                          utilities_df['idx'])

            chunk.log_df(trace_label, "utilities_df", utilities_df)

            if self.units_for_recipe(recipe) == 'utility':

                utilities_df[leg] = compute_utilities(
                    self.network_los,
                    maz_tap_settings,
                    utilities_df,
                    model_constants=model_constants,
                    trace_label=trace_label,
                    trace=trace,
                    trace_column_names=['idx', maz_col, tap_col]
                    if trace else None)

                chunk.log_df(trace_label, "utilities_df",
                             utilities_df)  # annotated

            else:

                assignment_spec = \
                    assign.read_assignment_spec(file_name=config.config_file_path(maz_tap_settings['SPEC']))

                results, _, _ = assign.assign_variables(
                    assignment_spec, utilities_df, model_constants)
                assert len(results.columns == 1)
                utilities_df[leg] = results

            chunk.log_df(trace_label, "utilities_df", utilities_df)

            if trace:
                self.trace_df(utilities_df, trace_label, 'utilities_df')

            # drop utility computation columns ('tod', 'demographic_segment' and maz_to_tap_df time/distance columns)
            utilities_df.drop(columns=attribute_columns + chooser_columns,
                              inplace=True)

        return utilities_df