Esempio n. 1
0
    def compute_tap_tap(self, recipe, maz_od_df, access_df, egress_df,
                        chooser_attributes, path_info, trace_label, trace):

        if self.units_for_recipe(recipe) == 'utility':

            if not self.tap_cache.is_open:
                with memo("#TVPB compute_tap_tap tap_cache.open"):
                    self.tap_cache.open()

            if trace:
                result = \
                    self.compute_tap_tap_utilities(recipe, access_df, egress_df, chooser_attributes,
                                                   path_info, trace_label, trace)
            else:
                result = \
                    self.lookup_tap_tap_utilities(recipe, maz_od_df, access_df, egress_df, chooser_attributes,
                                                  path_info, trace_label)
            return result
        else:
            assert self.units_for_recipe(recipe) == 'time'

            with memo("#TVPB compute_tap_tap_time"):
                result = self.compute_tap_tap_time(recipe, access_df,
                                                   egress_df,
                                                   chooser_attributes,
                                                   path_info, trace_label,
                                                   trace)
        return result
Esempio n. 2
0
    def compute_tap_tap_time(self, recipe, access_df, egress_df,
                             chooser_attributes, trace_label, trace):

        trace_label = tracing.extend_trace_label(trace_label,
                                                 'compute_tap_tap_time')

        model_constants = self.network_los.setting(
            f'TVPB_SETTINGS.{recipe}.CONSTANTS')
        tap_tap_settings = self.network_los.setting(
            f'TVPB_SETTINGS.{recipe}.tap_tap_settings')

        with memo("#TVPB CACHE compute_tap_tap_utilities all_transit_paths"):
            transit_df = self.all_transit_paths(access_df, egress_df,
                                                chooser_attributes,
                                                trace_label, trace)
            # note: transit_df index is arbitrary
            chunk.log_df(trace_label, "transit_df", transit_df)

        locals_d = {'los': self.network_los}
        locals_d.update(model_constants)

        assignment_spec = assign.read_assignment_spec(
            file_name=config.config_file_path(tap_tap_settings['SPEC']))

        results, _, _ = assign.assign_variables(assignment_spec, transit_df,
                                                locals_d)
        assert len(results.columns == 1)
        transit_df['transit'] = results

        # filter out unavailable btap_atap pairs
        logger.debug(
            f"{(transit_df['transit'] <= 0).sum()} unavailable tap_tap pairs out of {len(transit_df)}"
        )
        transit_df = transit_df[transit_df.transit > 0]

        transit_df.drop(columns=chooser_attributes.columns, inplace=True)

        chunk.log_df(trace_label, "transit_df", None)

        if trace:
            self.trace_df(transit_df, trace_label, 'transit_df')

        return transit_df
Esempio n. 3
0
    def build_virtual_path(self,
                           recipe,
                           path_type,
                           orig,
                           dest,
                           tod,
                           demographic_segment,
                           want_choices,
                           trace_label,
                           filter_targets=None,
                           trace=False,
                           override_choices=None):

        trace_label = tracing.extend_trace_label(trace_label,
                                                 'build_virtual_path')

        # Tracing is implemented as a seperate, second call that operates ONLY on filter_targets
        assert not (trace and filter_targets is None)
        if filter_targets is not None:
            assert filter_targets.any()

            # slice orig and dest
            orig = orig[filter_targets]
            dest = dest[filter_targets]
            assert len(orig) > 0
            assert len(dest) > 0

            # slice tod and demographic_segment if not scalar
            if not isinstance(tod, str):
                tod = tod[filter_targets]
            if demographic_segment is not None:
                demographic_segment = demographic_segment[filter_targets]
                assert len(demographic_segment) > 0

            # slice choices
            # (requires actual choices from the previous call lest rands change on second call)
            assert want_choices == (override_choices is not None)
            if want_choices:
                override_choices = override_choices[filter_targets]

        units = self.units_for_recipe(recipe)
        assert units == 'utility' or not want_choices, "'want_choices' only supported supported if units is utility"

        access_mode = self.network_los.setting(
            f'TVPB_SETTINGS.{recipe}.path_types.{path_type}.access')
        egress_mode = self.network_los.setting(
            f'TVPB_SETTINGS.{recipe}.path_types.{path_type}.egress')
        path_types_settings = self.network_los.setting(
            f'TVPB_SETTINGS.{recipe}.path_types.{path_type}')
        attributes_as_columns = \
            self.network_los.setting(f'TVPB_SETTINGS.{recipe}.tap_tap_settings.attributes_as_columns', [])

        path_info = {
            'path_type': path_type,
            'access_mode': access_mode,
            'egress_mode': egress_mode
        }

        # maz od pairs requested
        with memo("#TVPB build_virtual_path maz_od_df"):
            maz_od_df = pd.DataFrame({
                'idx': orig.index.values,
                'omaz': orig.values,
                'dmaz': dest.values,
                'seq': range(len(orig))
            })
            chunk.log_df(trace_label, "maz_od_df", maz_od_df)
            self.trace_maz_tap(maz_od_df, access_mode, egress_mode)

        # for location choice, there will be multiple alt dest rows per chooser and duplicate orig.index values
        # but tod and demographic_segment should be the same for all chooser rows (unique orig index values)
        # knowing this allows us to eliminate redundant computations (e.g. utilities of maz_tap pairs)
        duplicated = orig.index.duplicated(keep='first')
        chooser_attributes = pd.DataFrame(index=orig.index[~duplicated])
        if not isinstance(tod, str):
            chooser_attributes['tod'] = tod.loc[~duplicated]
        elif 'tod' in attributes_as_columns:
            chooser_attributes['tod'] = tod
        else:
            path_info['tod'] = tod
        if demographic_segment is not None:
            chooser_attributes[
                'demographic_segment'] = demographic_segment.loc[~duplicated]

        with memo("#TVPB build_virtual_path access_df"):
            access_df = self.compute_maz_tap_utilities(recipe,
                                                       maz_od_df,
                                                       chooser_attributes,
                                                       leg='access',
                                                       mode=access_mode,
                                                       trace_label=trace_label,
                                                       trace=trace)
        chunk.log_df(trace_label, "access_df", access_df)

        with memo("#TVPB build_virtual_path egress_df"):
            egress_df = self.compute_maz_tap_utilities(recipe,
                                                       maz_od_df,
                                                       chooser_attributes,
                                                       leg='egress',
                                                       mode=egress_mode,
                                                       trace_label=trace_label,
                                                       trace=trace)
        chunk.log_df(trace_label, "egress_df", egress_df)

        # path_info for use by expressions (e.g. penalty for drive access if no parking at access tap)
        with memo("#TVPB build_virtual_path compute_tap_tap"):
            transit_df = self.compute_tap_tap(recipe,
                                              maz_od_df,
                                              access_df,
                                              egress_df,
                                              chooser_attributes,
                                              path_info=path_info,
                                              trace_label=trace_label,
                                              trace=trace)
        chunk.log_df(trace_label, "transit_df", transit_df)

        with memo("#TVPB build_virtual_path best_paths"):
            path_df = self.best_paths(recipe, path_type, maz_od_df, access_df,
                                      egress_df, transit_df, trace_label,
                                      trace)
        chunk.log_df(trace_label, "path_df", path_df)

        # now that we have created path_df, we are done with the dataframes for the separate legs
        del access_df
        chunk.log_df(trace_label, "access_df", None)
        del egress_df
        chunk.log_df(trace_label, "egress_df", None)
        del transit_df
        chunk.log_df(trace_label, "transit_df", None)

        if units == 'utility':

            # logsums
            with memo("#TVPB build_virtual_path logsums"):
                # one row per seq with utilities in columns
                # path_num 0-based to aligh with logit.make_choices 0-based choice indexes
                path_df['path_num'] = path_df.groupby('seq').cumcount()
                chunk.log_df(trace_label, "path_df", path_df)

                utilities_df = path_df[['seq', 'path_num',
                                        units]].set_index(['seq', 'path_num'
                                                           ]).unstack()
                utilities_df.columns = utilities_df.columns.droplevel(
                )  # for legibility

                # add rows missing because no access or egress availability
                utilities_df = pd.concat(
                    [pd.DataFrame(index=maz_od_df.seq), utilities_df], axis=1)
                utilities_df = utilities_df.fillna(
                    UNAVAILABLE
                )  # set utilities for missing paths to UNAVAILABLE

                chunk.log_df(trace_label, "utilities_df", utilities_df)

                with warnings.catch_warnings(record=True) as w:
                    # Cause all warnings to always be triggered.
                    # most likely "divide by zero encountered in log" caused by all transit sets non-viable
                    warnings.simplefilter("always")

                    paths_nest_nesting_coefficient = path_types_settings.get(
                        'paths_nest_nesting_coefficient', 1)
                    exp_utilities = np.exp(utilities_df.values /
                                           paths_nest_nesting_coefficient)
                    logsums = np.maximum(
                        np.log(np.nansum(exp_utilities, axis=1)), UNAVAILABLE)

                    if len(w) > 0:
                        for wrn in w:
                            logger.warning(
                                f"{trace_label} - {type(wrn).__name__} ({wrn.message})"
                            )

                        DUMP = False
                        if DUMP:
                            zero_utilities_df = utilities_df[np.nansum(
                                np.exp(utilities_df.values), axis=1) == 0]
                            zero_utilities_df.to_csv(config.output_file_path(
                                'warning_utilities_df.csv'),
                                                     index=True)
                            bug

            if want_choices:

                # orig index to identify appropriate random number channel to use making choices
                utilities_df.index = orig.index

                with memo("#TVPB build_virtual_path make_choices"):

                    probs = logit.utils_to_probs(utilities_df,
                                                 allow_zero_probs=True,
                                                 trace_label=trace_label)
                    chunk.log_df(trace_label, "probs", probs)

                    if trace:
                        choices = override_choices

                        utilities_df['choices'] = choices
                        self.trace_df(utilities_df, trace_label,
                                      'utilities_df')

                        probs['choices'] = choices
                        self.trace_df(probs, trace_label, 'probs')
                    else:

                        choices, rands = logit.make_choices(
                            probs,
                            allow_bad_probs=True,
                            trace_label=trace_label)

                        chunk.log_df(trace_label, "rands", rands)
                        del rands
                        chunk.log_df(trace_label, "rands", None)

                    del probs
                    chunk.log_df(trace_label, "probs", None)

                # we need to get path_set, btap, atap from path_df row with same seq and path_num
                # drop seq join column, but keep path_num of choice to override_choices when tracing
                columns_to_cache = ['btap', 'atap', 'path_set', 'path_num']
                logsum_df = \
                    pd.merge(pd.DataFrame({'seq': range(len(orig)), 'path_num': choices.values}),
                             path_df[['seq'] + columns_to_cache],
                             on=['seq', 'path_num'], how='left')\
                    .drop(columns=['seq'])\
                    .set_index(orig.index)

                logsum_df['logsum'] = logsums

            else:

                assert len(logsums) == len(orig)
                logsum_df = pd.DataFrame({'logsum': logsums}, index=orig.index)

            chunk.log_df(trace_label, "logsum_df", logsum_df)

            del utilities_df
            chunk.log_df(trace_label, "utilities_df", None)

            if trace:
                self.trace_df(logsum_df, trace_label, 'logsum_df')

            chunk.log_df(trace_label, "logsum_df", logsum_df)
            results = logsum_df

        else:
            assert units == 'time'

            # return a series
            results = pd.Series(path_df[units].values, index=path_df['idx'])

            # zero-fill rows for O-D pairs where no best path exists because there was no tap-tap transit availability
            results = reindex(results, maz_od_df.idx).fillna(0.0)

            chunk.log_df(trace_label, "results", results)

        assert len(results) == len(orig)

        del path_df
        chunk.log_df(trace_label, "path_df", None)

        # diagnostic
        # maz_od_df['DIST'] = self.network_los.get_default_skim_dict().get('DIST').get(maz_od_df.omaz, maz_od_df.dmaz)
        # maz_od_df[units] = results.logsum if units == 'utility' else results.values
        # print(f"maz_od_df\n{maz_od_df}")

        return results
Esempio n. 4
0
    def compute_tap_tap_time(self, recipe, access_df, egress_df,
                             chooser_attributes, path_info, trace_label,
                             trace):

        trace_label = tracing.extend_trace_label(trace_label,
                                                 'compute_tap_tap_time')

        with chunk.chunk_log(trace_label):

            model_constants = self.network_los.setting(
                f'TVPB_SETTINGS.{recipe}.CONSTANTS')
            tap_tap_settings = self.network_los.setting(
                f'TVPB_SETTINGS.{recipe}.tap_tap_settings')

            with memo(
                    "#TVPB CACHE compute_tap_tap_utilities all_transit_paths"):
                transit_df = self.all_transit_paths(access_df, egress_df,
                                                    chooser_attributes,
                                                    trace_label, trace)
                # note: transit_df index is arbitrary
                chunk.log_df(trace_label, "transit_df", transit_df)

            # some expressions may want to know access mode -
            locals_dict = path_info.copy()
            locals_dict['los'] = self.network_los
            locals_dict.update(model_constants)

            assignment_spec = assign.read_assignment_spec(
                file_name=config.config_file_path(tap_tap_settings['SPEC']))

            DEDUPE = True
            if DEDUPE:

                # assign uid for reduping
                max_atap = transit_df.atap.max() + 1
                transit_df[
                    'uid'] = transit_df.btap * max_atap + transit_df.atap

                # dedupe
                chooser_attribute_columns = list(chooser_attributes.columns)
                unique_transit_df = \
                    transit_df.loc[~transit_df.uid.duplicated(), ['btap', 'atap', 'uid'] + chooser_attribute_columns]
                unique_transit_df.set_index('uid', inplace=True)
                chunk.log_df(trace_label, "unique_transit_df",
                             unique_transit_df)

                logger.debug(
                    f"#TVPB CACHE deduped transit_df from {len(transit_df)} to {len(unique_transit_df)}"
                )

                # assign_variables
                results, _, _ = assign.assign_variables(
                    assignment_spec, unique_transit_df, locals_dict)
                assert len(results.columns == 1)
                unique_transit_df['transit'] = results

                # redupe results back into transit_df
                with memo("#TVPB compute_tap_tap_time redupe transit_df"):
                    transit_df['transit'] = reindex(unique_transit_df.transit,
                                                    transit_df.uid)

                del transit_df['uid']
                del unique_transit_df
                chunk.log_df(trace_label, "transit_df", transit_df)
                chunk.log_df(trace_label, "unique_transit_df", None)

            else:
                results, _, _ = assign.assign_variables(
                    assignment_spec, transit_df, locals_dict)
                assert len(results.columns == 1)
                transit_df['transit'] = results

            # filter out unavailable btap_atap pairs
            logger.debug(
                f"{(transit_df['transit'] <= 0).sum()} unavailable tap_tap pairs out of {len(transit_df)}"
            )
            transit_df = transit_df[transit_df.transit > 0]

            transit_df.drop(columns=chooser_attributes.columns, inplace=True)

            chunk.log_df(trace_label, "transit_df", None)

            if trace:
                self.trace_df(transit_df, trace_label, 'transit_df')

        return transit_df
Esempio n. 5
0
    def lookup_tap_tap_utilities(self, recipe, maz_od_df, access_df, egress_df,
                                 chooser_attributes, path_info, trace_label):
        """
        create transit_df and compute utilities for all atap-btap pairs between omaz in access and dmaz in egress_df
        look up the utilities in the precomputed tap_cache data (which is indexed by uid_calculator unique_ids)
        (unique_id can used as a zero-based index into the data array)

        transit_df contains all possible access omaz/btap to egress dmaz/atap transit path pairs for each chooser

        Parameters
        ----------
        recipe
        maz_od_df
        access_df
        egress_df
        chooser_attributes
        path_info
        trace_label

        Returns
        -------

        """

        trace_label = tracing.extend_trace_label(trace_label,
                                                 'lookup_tap_tap_utils')

        with chunk.chunk_log(trace_label):

            with memo(
                    "#TVPB CACHE lookup_tap_tap_utilities all_transit_paths"):
                transit_df = self.all_transit_paths(access_df,
                                                    egress_df,
                                                    chooser_attributes,
                                                    trace_label,
                                                    trace=False)
                # note: transit_df index is arbitrary
                chunk.log_df(trace_label, "transit_df", transit_df)

            if TRACE_COMPLEXITY:
                # diagnostic: log the omaz,dmaz pairs with the greatest number of virtual tap-tap paths
                num_paths = transit_df.groupby(['idx']).size().to_frame('n')
                num_paths = pd.merge(maz_od_df,
                                     num_paths,
                                     left_on='idx',
                                     right_index=True)
                num_paths = num_paths[[
                    'omaz', 'dmaz', 'n'
                ]].drop_duplicates(subset=['omaz', 'dmaz'])
                num_paths = num_paths.sort_values(
                    'n', ascending=False).reset_index(drop=True)
                logger.debug(f"num_paths\n{num_paths.head(10)}")

            # FIXME some expressions may want to know access mode -
            locals_dict = path_info.copy()

            # add uid column to transit_df
            with memo("#TVPB lookup_tap_tap_utilities assign uid"):
                attribute_segments = \
                    self.network_los.setting('TVPB_SETTINGS.tour_mode_choice.tap_tap_settings.attribute_segments')
                scalar_attributes = {
                    k: locals_dict[k]
                    for k in attribute_segments.keys() if k not in transit_df
                }

                transit_df.index = self.uid_calculator.get_unique_ids(
                    transit_df, scalar_attributes)
                transit_df = transit_df[[
                    'idx', 'btap', 'atap'
                ]]  # just needed chooser_columns for uid calculation
                chunk.log_df(trace_label, "transit_df add uid index",
                             transit_df)

            with memo("#TVPB lookup_tap_tap_utilities reindex transit_df"):
                utilities = self.tap_cache.data
                i = 0
                for column_name in self.uid_calculator.set_names:
                    transit_df[column_name] = utilities[
                        transit_df.index.values, i]
                    i += 1

            for c in self.uid_calculator.set_names:
                assert ERR_CHECK and not transit_df[c].isnull().any()

            chunk.log_df(trace_label, "transit_df", None)

        return transit_df
Esempio n. 6
0
    def compute_tap_tap_utilities(self, recipe, access_df, egress_df,
                                  chooser_attributes, path_info, trace_label,
                                  trace):
        """
        create transit_df and compute utilities for all atap-btap pairs between omaz in access and dmaz in egress_df
        compute the utilities using the tap_tap utility expressions file specified in tap_tap_settings

        transit_df contains all possible access omaz/btap to egress dmaz/atap transit path pairs for each chooser

        trace should be True as we don't encourage/support dynamic utility computation except when tracing
        (precompute being fairly fast)

        Parameters
        ----------
        recipe: str
           'recipe' key in network_los.yaml TVPB_SETTINGS e.g. tour_mode_choice
        access_df: pandas.DataFrame
            dataframe with 'idx' and 'omaz' columns
        egress_df: pandas.DataFrame
            dataframe with 'idx' and 'dmaz' columns
        chooser_attributes: dict
        path_info
        trace_label: str
        trace: boolean

        Returns
        -------
        transit_df: pandas.dataframe
        """

        assert trace

        trace_label = tracing.extend_trace_label(trace_label,
                                                 'compute_tap_tap_utils')

        with chunk.chunk_log(trace_label):

            model_constants = self.network_los.setting(
                f'TVPB_SETTINGS.{recipe}.CONSTANTS')
            tap_tap_settings = self.network_los.setting(
                f'TVPB_SETTINGS.{recipe}.tap_tap_settings')

            with memo(
                    "#TVPB CACHE compute_tap_tap_utilities all_transit_paths"):
                transit_df = self.all_transit_paths(access_df, egress_df,
                                                    chooser_attributes,
                                                    trace_label, trace)
                # note: transit_df index is arbitrary
            chunk.log_df(trace_label, "transit_df", transit_df)

            # FIXME some expressions may want to know access mode -
            locals_dict = path_info.copy()
            locals_dict.update(model_constants)

            # columns needed for compute_utilities
            chooser_columns = ['btap', 'atap'] + list(
                chooser_attributes.columns)

            # deduplicate transit_df to unique_transit_df
            with memo(
                    "#TVPB compute_tap_tap_utilities deduplicate transit_df"):

                attribute_segments = \
                    self.network_los.setting('TVPB_SETTINGS.tour_mode_choice.tap_tap_settings.attribute_segments')
                scalar_attributes = {
                    k: locals_dict[k]
                    for k in attribute_segments.keys() if k not in transit_df
                }

                transit_df['uid'] = self.uid_calculator.get_unique_ids(
                    transit_df, scalar_attributes)

                unique_transit_df = transit_df.loc[
                    ~transit_df.uid.duplicated(), chooser_columns + ['uid']]
                logger.debug(
                    f"#TVPB CACHE deduped transit_df from {len(transit_df)} to {len(unique_transit_df)}"
                )

                unique_transit_df.set_index('uid', inplace=True)
                chunk.log_df(trace_label, "unique_transit_df",
                             unique_transit_df)

                transit_df = transit_df[['idx', 'btap', 'atap',
                                         'uid']]  # don't need chooser columns
                chunk.log_df(trace_label, "transit_df", transit_df)

            logger.debug(
                f"#TVPB CACHE compute_tap_tap_utilities dedupe transit_df "
                f"from {len(transit_df)} to {len(unique_transit_df)} rows")

            num_unique_transit_rows = len(unique_transit_df)  # errcheck
            logger.debug(
                f"#TVPB CACHE compute_tap_tap_utilities compute_utilities for {len(unique_transit_df)} rows"
            )

            with memo("#TVPB compute_tap_tap_utilities compute_utilities"):
                unique_utilities_df = compute_utilities(
                    self.network_los,
                    tap_tap_settings,
                    choosers=unique_transit_df,
                    model_constants=locals_dict,
                    trace_label=trace_label,
                    trace=trace,
                    trace_column_names=chooser_columns if trace else None)
                chunk.log_df(trace_label, "unique_utilities_df",
                             unique_utilities_df)
                chunk.log_df(trace_label, "unique_transit_df",
                             unique_transit_df)  # annotated

                if trace:
                    # combine unique_transit_df with unique_utilities_df for legibility
                    omnibus_df = pd.merge(unique_transit_df,
                                          unique_utilities_df,
                                          left_index=True,
                                          right_index=True,
                                          how='left')
                    self.trace_df(omnibus_df, trace_label,
                                  'unique_utilities_df')
                    chunk.log_df(trace_label, "omnibus_df", omnibus_df)
                    del omnibus_df
                    chunk.log_df(trace_label, "omnibus_df", None)

            assert num_unique_transit_rows == len(
                unique_utilities_df)  # errcheck

            # redupe unique_transit_df back into transit_df
            with memo("#TVPB compute_tap_tap_utilities redupe transit_df"):

                # idx = transit_df.index
                transit_df = pd.merge(transit_df,
                                      unique_utilities_df,
                                      left_on='uid',
                                      right_index=True)
                del transit_df['uid']
                # transit_df.index = idx
                # note: left merge on columns does not preserve index,
                # but transit_df index is arbitrary so no need to restore

                chunk.log_df(trace_label, "transit_df", transit_df)

            for c in unique_utilities_df:
                assert ERR_CHECK and not transit_df[c].isnull().any()

            if len(unique_transit_df) > 0:
                # if all rows were cached, then unique_utilities_df is just a ref to cache
                del unique_utilities_df
                chunk.log_df(trace_label, "unique_utilities_df", None)

            chunk.log_df(trace_label, "transit_df", None)

            if trace:
                self.trace_df(transit_df, trace_label, 'transit_df')

        return transit_df