Beispiel #1
0
def initialize_households():

    trace_label = 'initialize_households'

    with chunk.chunk_log(trace_label, base=True):

        chunk.log_rss(f"{trace_label}.inside-yield")

        households = inject.get_table('households').to_frame()
        assert not households._is_view
        chunk.log_df(trace_label, "households", households)
        del households
        chunk.log_df(trace_label, "households", None)

        persons = inject.get_table('persons').to_frame()
        assert not persons._is_view
        chunk.log_df(trace_label, "persons", persons)
        del persons
        chunk.log_df(trace_label, "persons", None)

        model_settings = config.read_model_settings(
            'initialize_households.yaml', mandatory=True)
        annotate_tables(model_settings, trace_label)

        # - initialize shadow_pricing size tables after annotating household and person tables
        # since these are scaled to model size, they have to be created while single-process
        # this can now be called as a stand alone model step instead, add_size_tables
        add_size_tables = model_settings.get('add_size_tables', True)
        if add_size_tables:
            # warnings.warn(f"Calling add_size_tables from initialize will be removed in the future.", FutureWarning)
            shadow_pricing.add_size_tables()

        # - preload person_windows
        person_windows = inject.get_table('person_windows').to_frame()
        chunk.log_df(trace_label, "person_windows", person_windows)
Beispiel #2
0
    def best_paths(self, recipe, path_type, maz_od_df, access_df, egress_df, transit_df, trace_label, trace=False):

        trace_label = tracing.extend_trace_label(trace_label, 'best_paths')

        path_settings = self.network_los.setting(f'TVPB_SETTINGS.{recipe}.path_types.{path_type}')
        max_paths_per_tap_set = path_settings.get('max_paths_per_tap_set', 1)
        max_paths_across_tap_sets = path_settings.get('max_paths_across_tap_sets', 1)

        units = self.units_for_recipe(recipe)
        smaller_is_better = (units in ['time'])

        maz_od_df['seq'] = maz_od_df.index
        # maz_od_df has one row per chooser
        # inner join to add rows for each access, egress, and transit segment combination
        path_df = maz_od_df. \
            merge(access_df, on=['idx', 'omaz'], how='inner'). \
            merge(egress_df, on=['idx', 'dmaz'], how='inner'). \
            merge(transit_df, on=['idx', 'atap', 'btap'], how='inner')

        chunk.log_df(trace_label, "path_df", path_df)

        # transit sets are the transit_df non-join columns
        transit_sets = [c for c in transit_df.columns if c not in ['idx', 'atap', 'btap']]

        if trace:
            # be nice and show both tap_tap set utility and total_set = access + set + egress
            for c in transit_sets:
                path_df[f'total_{c}'] = path_df[c] + path_df['access'] + path_df['egress']
            self.trace_df(path_df, trace_label, 'best_paths.full')
            for c in transit_sets:
                del path_df[f'total_{c}']

        for c in transit_sets:
            path_df[c] = path_df[c] + path_df['access'] + path_df['egress']
        path_df.drop(columns=['access', 'egress'], inplace=True)

        # choose best paths by tap set
        best_paths_list = []
        for c in transit_sets:
            keep = path_df.index.isin(
                path_df[['seq', c]].sort_values(by=c, ascending=smaller_is_better).
                groupby(['seq']).head(max_paths_per_tap_set).index
            )

            best_paths_for_set = path_df[keep]
            best_paths_for_set['path_set'] = c  # remember the path set
            best_paths_for_set[units] = path_df[keep][c]
            best_paths_for_set.drop(columns=transit_sets, inplace=True)
            best_paths_list.append(best_paths_for_set)

        path_df = pd.concat(best_paths_list).sort_values(by=['seq', units], ascending=[True, smaller_is_better])

        # choose best paths overall by seq
        path_df = path_df.sort_values(by=['seq', units], ascending=[True, smaller_is_better])
        path_df = path_df[path_df.index.isin(path_df.groupby(['seq']).head(max_paths_across_tap_sets).index)]

        if trace:
            self.trace_df(path_df, trace_label, 'best_paths')

        return path_df
Beispiel #3
0
def run_trip_scheduling(
        trips,
        tours,
        probs_spec,
        model_settings,
        estimator,
        is_last_iteration,
        chunk_size,
        chunk_tag,
        trace_hh_id,
        trace_label):

    # only non-initial trips require scheduling, segment handing first such trip in tour will use most space
    # is_outbound_chooser = (trips.trip_num > 1) & trips.outbound & (trips.primary_purpose != 'atwork')
    # is_inbound_chooser = (trips.trip_num < trips.trip_count) & ~trips.outbound & (trips.primary_purpose != 'atwork')
    # num_choosers = (is_inbound_chooser | is_outbound_chooser).sum()

    result_list = []
    for i, trips_chunk, chunk_trace_label \
            in chunk.adaptive_chunked_choosers_by_chunk_id(trips, chunk_size, trace_label, chunk_tag):

        if trips_chunk.outbound.any():
            leg_chunk = trips_chunk[trips_chunk.outbound]
            leg_trace_label = tracing.extend_trace_label(chunk_trace_label, 'outbound')
            choices = \
                schedule_trips_in_leg(
                    outbound=True,
                    trips=leg_chunk,
                    probs_spec=probs_spec,
                    model_settings=model_settings,
                    is_last_iteration=is_last_iteration,
                    trace_hh_id=trace_hh_id,
                    trace_label=leg_trace_label)
            result_list.append(choices)

            chunk.log_df(trace_label, f'result_list', result_list)

        if (~trips_chunk.outbound).any():
            leg_chunk = trips_chunk[~trips_chunk.outbound]
            leg_trace_label = tracing.extend_trace_label(chunk_trace_label, 'inbound')
            choices = \
                schedule_trips_in_leg(
                    outbound=False,
                    trips=leg_chunk,
                    probs_spec=probs_spec,
                    model_settings=model_settings,
                    is_last_iteration=is_last_iteration,
                    trace_hh_id=trace_hh_id,
                    trace_label=leg_trace_label)
            result_list.append(choices)

            chunk.log_df(trace_label, f'result_list', result_list)

    choices = pd.concat(result_list)

    return choices
def run_trip_scheduling(trips_chunk, tours, probs_spec, model_settings,
                        estimator, is_last_iteration, chunk_size, trace_hh_id,
                        trace_label):

    set_tour_hour(trips_chunk, tours)
    set_stop_num(trips_chunk)

    # only non-initial trips require scheduling, segment handing first such trip in tour will use most space
    # is_outbound_chooser = (trips.trip_num > 1) & trips.outbound & (trips.primary_purpose != 'atwork')
    # is_inbound_chooser = (trips.trip_num < trips.trip_count) & ~trips.outbound & (trips.primary_purpose != 'atwork')
    # num_choosers = (is_inbound_chooser | is_outbound_chooser).sum()

    result_list = []

    if trips_chunk.outbound.any():
        leg_chunk = trips_chunk[trips_chunk.outbound]
        leg_trace_label = tracing.extend_trace_label(trace_label, 'outbound')
        choices = \
            schedule_trips_in_leg(
                outbound=True,
                trips=leg_chunk,
                probs_spec=probs_spec,
                model_settings=model_settings,
                is_last_iteration=is_last_iteration,
                trace_hh_id=trace_hh_id,
                trace_label=leg_trace_label)
        result_list.append(choices)

        chunk.log_df(trace_label, f'result_list', result_list)

        # departure time of last outbound trips must constrain
        # departure times for initial inbound trips
        update_tour_earliest(trips_chunk, choices)

    if (~trips_chunk.outbound).any():
        leg_chunk = trips_chunk[~trips_chunk.outbound]
        leg_trace_label = tracing.extend_trace_label(trace_label, 'inbound')
        choices = \
            schedule_trips_in_leg(
                outbound=False,
                trips=leg_chunk,
                probs_spec=probs_spec,
                model_settings=model_settings,
                is_last_iteration=is_last_iteration,
                trace_hh_id=trace_hh_id,
                trace_label=leg_trace_label)
        result_list.append(choices)

        chunk.log_df(trace_label, f'result_list', result_list)

    choices = pd.concat(result_list)

    return choices
Beispiel #5
0
def initialize_landuse():

    trace_label = 'initialize_landuse'

    with chunk.chunk_log(trace_label, base=True):

        model_settings = config.read_model_settings('initialize_landuse.yaml',
                                                    mandatory=True)

        annotate_tables(model_settings, trace_label)

        # instantiate accessibility (must be checkpointed to be be used to slice accessibility)
        accessibility = pipeline.get_table('accessibility')
        chunk.log_df(trace_label, "accessibility", accessibility)
Beispiel #6
0
def compute_utilities_for_attribute_tuple(network_los, scalar_attributes, data, chunk_size, trace_label):

    # scalar_attributes is a dict of attribute name/value pairs for this combination
    # (e.g. {'demographic_segment': 0, 'tod': 'AM', 'access_mode': 'walk'})

    logger.info(f"{trace_label} scalar_attributes: {scalar_attributes}")

    uid_calculator = network_los.tvpb.uid_calculator

    attributes_as_columns = \
        network_los.setting('TVPB_SETTINGS.tour_mode_choice.tap_tap_settings.attributes_as_columns', [])
    model_settings = \
        network_los.setting(f'TVPB_SETTINGS.tour_mode_choice.tap_tap_settings')
    model_constants = \
        network_los.setting(f'TVPB_SETTINGS.tour_mode_choice.CONSTANTS').copy()
    model_constants.update(scalar_attributes)

    data = data.reshape(uid_calculator.fully_populated_shape)

    # get od skim_offset dataframe with uid index corresponding to scalar_attributes
    choosers_df = uid_calculator.get_od_dataframe(scalar_attributes)

    # choosers_df is pretty big and was custom made for compute_utilities but we don't need to chunk_log it
    # since it is created outside of adaptive_chunked_choosers and so will show up in baseline
    assert not chunk.chunk_logging()  # otherwise we should chunk_log this

    chunk_tag = 'initialize_tvpb'  # all attribute_combinations can use same cached data for row_size calc

    for i, chooser_chunk, chunk_trace_label \
            in chunk.adaptive_chunked_choosers(choosers_df, chunk_size, trace_label, chunk_tag=chunk_tag):
        # we should count choosers_df as chunk overhead since its pretty big and was custom made for compute_utilities
        assert chooser_chunk._is_view  # otherwise copying it is wasteful
        chooser_chunk = chooser_chunk.copy()
        chunk.log_df(trace_label, 'attribute_chooser_chunk', chooser_chunk)

        # add any attribute columns specified as column attributes in settings (the rest will be scalars in locals_dict)
        for attribute_name in attributes_as_columns:
            chooser_chunk[attribute_name] = scalar_attributes[attribute_name]

        chunk.log_df(trace_label, 'attribute_chooser_chunk', chooser_chunk)

        utilities_df = \
            pathbuilder.compute_utilities(network_los,
                                          model_settings=model_settings,
                                          choosers=chooser_chunk,
                                          model_constants=model_constants,
                                          trace_label=trace_label)

        chunk.log_df(trace_label, 'utilities_df', utilities_df)

        assert len(utilities_df) == len(chooser_chunk)
        assert len(utilities_df.columns) == data.shape[1]
        assert not any_uninitialized(utilities_df.values)

        data[chooser_chunk.index.values, :] = utilities_df.values

        del chooser_chunk
        chunk.log_df(trace_label, 'attribute_chooser_chunk', None)

    logger.debug(f"{trace_label} updated utilities")
Beispiel #7
0
def choose_intermediate_trip_purpose(trips, probs_spec, trace_hh_id, trace_label):
    """
    chose purpose for intermediate trips based on probs_spec
    which assigns relative weights (summing to 1) to the possible purpose choices

    Returns
    -------
    purpose: pandas.Series of purpose (str) indexed by trip_id
    """

    probs_join_cols = ['primary_purpose', 'outbound', 'person_type']
    non_purpose_cols = probs_join_cols + ['depart_range_start', 'depart_range_end']
    purpose_cols = [c for c in probs_spec.columns if c not in non_purpose_cols]

    num_trips = len(trips.index)
    have_trace_targets = trace_hh_id and tracing.has_trace_targets(trips)

    # probs shold sum to 1 across rows
    sum_probs = probs_spec[purpose_cols].sum(axis=1)
    probs_spec.loc[:, purpose_cols] = probs_spec.loc[:, purpose_cols].div(sum_probs, axis=0)

    # left join trips to probs (there may be multiple rows per trip for multiple depart ranges)
    choosers = pd.merge(trips.reset_index(), probs_spec, on=probs_join_cols,
                        how='left').set_index('trip_id')

    chunk.log_df(trace_label, 'choosers', choosers)

    # select the matching depart range (this should result on in exactly one chooser row per trip)
    choosers = choosers[(choosers.start >= choosers['depart_range_start']) & (
                choosers.start <= choosers['depart_range_end'])]

    # choosers should now match trips row for row
    assert choosers.index.is_unique
    assert len(choosers.index) == num_trips

    choices, rands = logit.make_choices(
        choosers[purpose_cols],
        trace_label=trace_label, trace_choosers=choosers)

    if have_trace_targets:
        tracing.trace_df(choices, '%s.choices' % trace_label, columns=[None, 'trip_purpose'])
        tracing.trace_df(rands, '%s.rands' % trace_label, columns=[None, 'rand'])

    choices = choices.map(pd.Series(purpose_cols))
    return choices
Beispiel #8
0
    def compute_tap_tap_time(self, recipe, access_df, egress_df,
                             chooser_attributes, trace_label, trace):

        trace_label = tracing.extend_trace_label(trace_label,
                                                 'compute_tap_tap_time')

        model_constants = self.network_los.setting(
            f'TVPB_SETTINGS.{recipe}.CONSTANTS')
        tap_tap_settings = self.network_los.setting(
            f'TVPB_SETTINGS.{recipe}.tap_tap_settings')

        with memo("#TVPB CACHE compute_tap_tap_utilities all_transit_paths"):
            transit_df = self.all_transit_paths(access_df, egress_df,
                                                chooser_attributes,
                                                trace_label, trace)
            # note: transit_df index is arbitrary
            chunk.log_df(trace_label, "transit_df", transit_df)

        locals_d = {'los': self.network_los}
        locals_d.update(model_constants)

        assignment_spec = assign.read_assignment_spec(
            file_name=config.config_file_path(tap_tap_settings['SPEC']))

        results, _, _ = assign.assign_variables(assignment_spec, transit_df,
                                                locals_d)
        assert len(results.columns == 1)
        transit_df['transit'] = results

        # filter out unavailable btap_atap pairs
        logger.debug(
            f"{(transit_df['transit'] <= 0).sum()} unavailable tap_tap pairs out of {len(transit_df)}"
        )
        transit_df = transit_df[transit_df.transit > 0]

        transit_df.drop(columns=chooser_attributes.columns, inplace=True)

        chunk.log_df(trace_label, "transit_df", None)

        if trace:
            self.trace_df(transit_df, trace_label, 'transit_df')

        return transit_df
Beispiel #9
0
def annotate_tables(model_settings, trace_label):

    trace_label = tracing.extend_trace_label(trace_label, 'annotate_tables')

    chunk.log_rss(trace_label)

    annotate_tables = model_settings.get('annotate_tables', [])

    if not annotate_tables:
        logger.warning(
            f"{trace_label} - annotate_tables setting is empty - nothing to do!"
        )

    assert isinstance(annotate_tables, list), \
        f"annotate_tables settings should be a list but is {type(annotate_tables)}"

    t0 = tracing.print_elapsed_time()

    for table_info in annotate_tables:

        tablename = table_info['tablename']

        chunk.log_rss(f"{trace_label}.pre-get_table.{tablename}")

        df = inject.get_table(tablename).to_frame()
        chunk.log_df(trace_label, tablename, df)

        # - rename columns
        column_map = table_info.get('column_map', None)
        if column_map:

            warnings.warn(
                f"Setting 'column_map' has been changed to 'rename_columns'. "
                f"Support for 'column_map' in annotate_tables  will be removed in future versions.",
                FutureWarning)

            logger.info(
                f"{trace_label} - renaming {tablename} columns {column_map}")
            df.rename(columns=column_map, inplace=True)

        # - annotate
        annotate = table_info.get('annotate', None)
        if annotate:
            logger.info(
                f"{trace_label} - annotating {tablename} SPEC {annotate['SPEC']}"
            )
            expressions.assign_columns(df=df,
                                       model_settings=annotate,
                                       trace_label=trace_label)

        chunk.log_df(trace_label, tablename, df)

        # - write table to pipeline
        pipeline.replace_table(tablename, df)

        del df
        chunk.log_df(trace_label, tablename, None)
Beispiel #10
0
def compute_utilities_for_atttribute_tuple(network_los, scalar_attributes,
                                           data, chunk_size, trace_label):

    # scalar_attributes is a dict of attribute name/value pairs for this combination
    # (e.g. {'demographic_segment': 0, 'tod': 'AM', 'access_mode': 'walk'})

    logger.info(f"{trace_label} scalar_attributes: {scalar_attributes}")

    uid_calculator = network_los.tvpb.uid_calculator

    attributes_as_columns = \
        network_los.setting('TVPB_SETTINGS.tour_mode_choice.tap_tap_settings.attributes_as_columns', [])
    model_settings = \
        network_los.setting(f'TVPB_SETTINGS.tour_mode_choice.tap_tap_settings')
    model_constants = \
        network_los.setting(f'TVPB_SETTINGS.tour_mode_choice.CONSTANTS').copy()
    model_constants.update(scalar_attributes)

    data = data.reshape(uid_calculator.fully_populated_shape)

    # get od skim_offset dataframe with uid index corresponding to scalar_attributes
    choosers_df = uid_calculator.get_od_dataframe(scalar_attributes)

    row_size = chunk_size and initialize_tvpb_calc_row_size(
        choosers_df, network_los, trace_label)
    for i, chooser_chunk, chunk_trace_label \
            in chunk.adaptive_chunked_choosers(choosers_df, chunk_size, row_size, trace_label):

        # we should count choosers_df as chunk overhead since its pretty big and was custom made for compute_utilities
        # (call log_df from inside yield loop so it is visible to adaptive_chunked_choosers chunk_log)
        chunk.log_df(trace_label, 'choosers_df', choosers_df)

        # add any attribute columns specified as column attributes in settings (the rest will be scalars in locals_dict)
        for attribute_name in attributes_as_columns:
            chooser_chunk[attribute_name] = scalar_attributes[attribute_name]

        chunk.log_df(trace_label, 'chooser_chunk', chooser_chunk)

        utilities_df = \
            pathbuilder.compute_utilities(network_los,
                                          model_settings=model_settings,
                                          choosers=chooser_chunk,
                                          model_constants=model_constants,
                                          trace_label=trace_label)

        chunk.log_df(trace_label, 'utilities_df', utilities_df)

        assert len(utilities_df) == len(chooser_chunk)
        assert len(utilities_df.columns) == data.shape[1]
        assert not any_uninitialized(utilities_df.values)

        data[chooser_chunk.index.values, :] = utilities_df.values

    logger.debug(f"{trace_label} updated utilities")
Beispiel #11
0
    def compute_maz_tap_utilities(self, recipe, maz_od_df, chooser_attributes,
                                  leg, mode, trace_label, trace):

        trace_label = tracing.extend_trace_label(trace_label,
                                                 f'maz_tap_utils.{leg}')

        with chunk.chunk_log(trace_label):

            maz_tap_settings = \
                self.network_los.setting(f'TVPB_SETTINGS.{recipe}.maz_tap_settings.{mode}')
            chooser_columns = maz_tap_settings['CHOOSER_COLUMNS']
            attribute_columns = list(
                chooser_attributes.columns
            ) if chooser_attributes is not None else []
            model_constants = self.network_los.setting(
                f'TVPB_SETTINGS.{recipe}.CONSTANTS')

            if leg == 'access':
                maz_col = 'omaz'
                tap_col = 'btap'
            else:
                maz_col = 'dmaz'
                tap_col = 'atap'

            # maz_to_tap access/egress utilities
            # deduped utilities_df - one row per chooser for each boarding tap (btap) accessible from omaz
            utilities_df = self.network_los.maz_to_tap_dfs[mode]

            utilities_df = utilities_df[chooser_columns]. \
                reset_index(drop=False). \
                rename(columns={'MAZ': maz_col, 'TAP': tap_col})
            utilities_df = pd.merge(maz_od_df[['idx',
                                               maz_col]].drop_duplicates(),
                                    utilities_df,
                                    on=maz_col,
                                    how='inner')
            # add any supplemental chooser attributes (e.g. demographic_segment, tod)
            for c in attribute_columns:
                utilities_df[c] = reindex(chooser_attributes[c],
                                          utilities_df['idx'])

            chunk.log_df(trace_label, "utilities_df", utilities_df)

            if self.units_for_recipe(recipe) == 'utility':

                utilities_df[leg] = compute_utilities(
                    self.network_los,
                    maz_tap_settings,
                    utilities_df,
                    model_constants=model_constants,
                    trace_label=trace_label,
                    trace=trace,
                    trace_column_names=['idx', maz_col, tap_col]
                    if trace else None)

                chunk.log_df(trace_label, "utilities_df",
                             utilities_df)  # annotated

            else:

                assignment_spec = \
                    assign.read_assignment_spec(file_name=config.config_file_path(maz_tap_settings['SPEC']))

                results, _, _ = assign.assign_variables(
                    assignment_spec, utilities_df, model_constants)
                assert len(results.columns == 1)
                utilities_df[leg] = results

            chunk.log_df(trace_label, "utilities_df", utilities_df)

            if trace:
                self.trace_df(utilities_df, trace_label, 'utilities_df')

            # drop utility computation columns ('tod', 'demographic_segment' and maz_to_tap_df time/distance columns)
            utilities_df.drop(columns=attribute_columns + chooser_columns,
                              inplace=True)

        return utilities_df
Beispiel #12
0
def run_trip_purpose(trips_df, estimator, chunk_size, trace_hh_id,
                     trace_label):
    """
    trip purpose - main functionality separated from model step so it can be called iteratively

    For each intermediate stop on a tour (i.e. trip other than the last trip outbound or inbound)
    each trip is assigned a purpose based on an observed frequency distribution

    The distribution should always be segmented by tour purpose and tour direction. By default it is also
    segmented by person type. The join columns can be overwritten using the "probs_join_cols" parameter in
    the model settings. The model will attempt to segment by trip depart time as well if necessary
    and depart time ranges are specified in the probability lookup table.

    Returns
    -------
    purpose: pandas.Series of purpose (str) indexed by trip_id
    """

    # uniform across trip_purpose
    chunk_tag = 'trip_purpose'

    model_settings_file_name = 'trip_purpose.yaml'
    model_settings = config.read_model_settings(model_settings_file_name)

    probs_join_cols = model_settings.get('probs_join_cols', PROBS_JOIN_COLUMNS)

    spec_file_name = model_settings.get('PROBS_SPEC', 'trip_purpose_probs.csv')
    probs_spec = pd.read_csv(config.config_file_path(spec_file_name),
                             comment='#')
    # FIXME for now, not really doing estimation for probabilistic model - just overwriting choices
    # besides, it isn't clear that named coefficients would be helpful if we had some form of estimation
    # coefficients_df = simulate.read_model_coefficients(model_settings)
    # probs_spec = map_coefficients(probs_spec, coefficients_df)

    if estimator:
        estimator.write_spec(model_settings, tag='PROBS_SPEC')
        estimator.write_model_settings(model_settings,
                                       model_settings_file_name)
        # estimator.write_coefficients(coefficients_df, model_settings)

    result_list = []

    # - last trip of outbound tour gets primary_purpose
    last_trip = (trips_df.trip_num == trips_df.trip_count)
    purpose = trips_df.primary_purpose[last_trip & trips_df.outbound]
    result_list.append(purpose)
    logger.info("assign purpose to %s last outbound trips", purpose.shape[0])

    # - last trip of inbound tour gets home (or work for atwork subtours)
    purpose = trips_df.primary_purpose[last_trip & ~trips_df.outbound]
    purpose = pd.Series(np.where(purpose == 'atwork', 'work', 'home'),
                        index=purpose.index)
    result_list.append(purpose)
    logger.info("assign purpose to %s last inbound trips", purpose.shape[0])

    # - intermediate stops (non-last trips) purpose assigned by probability table
    trips_df = trips_df[~last_trip]
    logger.info("assign purpose to %s intermediate trips", trips_df.shape[0])

    preprocessor_settings = model_settings.get('preprocessor', None)
    if preprocessor_settings:
        locals_dict = config.get_model_constants(model_settings)
        expressions.assign_columns(df=trips_df,
                                   model_settings=preprocessor_settings,
                                   locals_dict=locals_dict,
                                   trace_label=trace_label)

    use_depart_time = model_settings.get('use_depart_time', True)

    for i, trips_chunk, chunk_trace_label in \
            chunk.adaptive_chunked_choosers(trips_df, chunk_size, chunk_tag, trace_label):
        choices = choose_intermediate_trip_purpose(
            trips_chunk,
            probs_spec,
            estimator,
            probs_join_cols=probs_join_cols,
            use_depart_time=use_depart_time,
            trace_hh_id=trace_hh_id,
            trace_label=chunk_trace_label)

        result_list.append(choices)

        chunk.log_df(trace_label, f'result_list', result_list)

    if len(result_list) > 1:
        choices = pd.concat(result_list)

    return choices
Beispiel #13
0
def choose_intermediate_trip_purpose(trips, probs_spec, estimator,
                                     probs_join_cols, use_depart_time,
                                     trace_hh_id, trace_label):
    """
    chose purpose for intermediate trips based on probs_spec
    which assigns relative weights (summing to 1) to the possible purpose choices

    Returns
    -------
    purpose: pandas.Series of purpose (str) indexed by trip_id
    """

    non_purpose_cols = probs_join_cols.copy()
    if use_depart_time:
        non_purpose_cols += ['depart_range_start', 'depart_range_end']
    purpose_cols = [c for c in probs_spec.columns if c not in non_purpose_cols]

    num_trips = len(trips.index)
    have_trace_targets = trace_hh_id and tracing.has_trace_targets(trips)

    # probs should sum to 1 across rows
    sum_probs = probs_spec[purpose_cols].sum(axis=1)
    probs_spec.loc[:,
                   purpose_cols] = probs_spec.loc[:,
                                                  purpose_cols].div(sum_probs,
                                                                    axis=0)

    # left join trips to probs (there may be multiple rows per trip for multiple depart ranges)
    choosers = pd.merge(trips.reset_index(),
                        probs_spec,
                        on=probs_join_cols,
                        how='left').set_index('trip_id')
    chunk.log_df(trace_label, 'choosers', choosers)

    if use_depart_time:

        # select the matching depart range (this should result on in exactly one chooser row per trip)
        chooser_probs = \
            (choosers.start >= choosers['depart_range_start']) & (choosers.start <= choosers['depart_range_end'])

        # if we failed to match a row in probs_spec
        if chooser_probs.sum() < num_trips:

            # this can happen if the spec doesn't have probs for the trips matching a trip's probs_join_cols
            missing_trip_ids = trips.index[
                ~trips.index.isin(choosers.index[chooser_probs])].values
            unmatched_choosers = choosers[choosers.index.isin(
                missing_trip_ids)]
            unmatched_choosers = unmatched_choosers[['person_id', 'start'] +
                                                    non_purpose_cols]

            # join to persons for better diagnostics
            persons = inject.get_table('persons').to_frame()
            persons_cols = [
                'age', 'is_worker', 'is_student', 'is_gradeschool',
                'is_highschool', 'is_university'
            ]
            unmatched_choosers = pd.merge(unmatched_choosers,
                                          persons[[
                                              col for col in persons_cols
                                              if col in persons.columns
                                          ]],
                                          left_on='person_id',
                                          right_index=True,
                                          how='left')

            file_name = '%s.UNMATCHED_PROBS' % trace_label
            logger.error(
                "%s %s of %s intermediate trips could not be matched to probs based on join columns  %s"
                % (trace_label, len(unmatched_choosers), len(choosers),
                   probs_join_cols))
            logger.info("Writing %s unmatched choosers to %s" % (
                len(unmatched_choosers),
                file_name,
            ))
            tracing.write_csv(unmatched_choosers,
                              file_name=file_name,
                              transpose=False)
            raise RuntimeError(
                "Some trips could not be matched to probs based on join columns %s."
                % probs_join_cols)

        # select the matching depart range (this should result on in exactly one chooser row per trip)
        choosers = choosers[chooser_probs]

    # choosers should now match trips row for row
    assert choosers.index.identical(trips.index)

    if estimator:
        probs_cols = list(probs_spec.columns)
        print(choosers[probs_cols])
        estimator.write_table(choosers[probs_cols], 'probs', append=True)

    choices, rands = logit.make_choices(choosers[purpose_cols],
                                        trace_label=trace_label,
                                        trace_choosers=choosers)

    if have_trace_targets:
        tracing.trace_df(choices,
                         '%s.choices' % trace_label,
                         columns=[None, 'trip_purpose'])
        tracing.trace_df(rands,
                         '%s.rands' % trace_label,
                         columns=[None, 'rand'])

    choices = choices.map(pd.Series(purpose_cols))
    return choices
Beispiel #14
0
def schedule_trips_in_leg(outbound, trips, probs_spec, model_settings,
                          is_last_iteration, trace_hh_id, trace_label):
    """

    Parameters
    ----------
    outbound
    trips
    probs_spec
    depart_alt_base
    is_last_iteration
    trace_hh_id
    trace_label

    Returns
    -------
    choices: pd.Series
        depart choice for trips, indexed by trip_id
    """

    failfix = model_settings.get(FAILFIX, FAILFIX_DEFAULT)

    # logger.debug("%s scheduling %s trips" % (trace_label, trips.shape[0]))

    assert len(trips) > 0

    assert (trips.outbound == outbound).all()

    # initial trip of leg and all atwork trips get tour_hour
    is_initial = (trips.trip_num == 1) if outbound else (trips.trip_num
                                                         == trips.trip_count)
    no_scheduling = is_initial | (trips.primary_purpose == 'atwork')
    choices = trips.tour_hour[no_scheduling]

    if no_scheduling.all():
        return choices

    result_list = []
    result_list.append(choices)
    trips = trips[~no_scheduling]

    # add next_trip_id temp column (temp as trips is now a copy, as result of slicing)
    trips = trips.sort_index()
    trips['next_trip_id'] = np.roll(trips.index, -1 if outbound else 1)
    is_final = (trips.trip_num
                == trips.trip_count) if outbound else (trips.trip_num == 1)
    trips.next_trip_id = trips.next_trip_id.where(~is_final, NO_TRIP_ID)

    # iterate over outbound trips in ascending trip_num order, skipping the initial trip
    # iterate over inbound trips in descending trip_num order, skipping the finial trip
    first_trip_in_leg = True
    for i in range(trips.trip_num.min(), trips.trip_num.max() + 1):

        if outbound:
            nth_trips = trips[trips.trip_num == i]
        else:
            nth_trips = trips[trips.trip_num == trips.trip_count - i]

        nth_trace_label = tracing.extend_trace_label(trace_label, 'num_%s' % i)

        choices = schedule_nth_trips(nth_trips,
                                     probs_spec,
                                     model_settings,
                                     first_trip_in_leg=first_trip_in_leg,
                                     report_failed_trips=is_last_iteration,
                                     trace_hh_id=trace_hh_id,
                                     trace_label=nth_trace_label)

        # if outbound, this trip's depart constrains next trip's earliest depart option
        # if inbound, we are handling in reverse order, so it constrains latest depart instead
        ADJUST_NEXT_DEPART_COL = 'earliest' if outbound else 'latest'

        # most initial departure (when no choice was made because all probs were zero)
        if is_last_iteration and (failfix == FAILFIX_CHOOSE_MOST_INITIAL):
            choices = choices.reindex(nth_trips.index)
            logger.warning("%s coercing %s depart choices to most initial" %
                           (nth_trace_label, choices.isna().sum()))
            choices = choices.fillna(trips[ADJUST_NEXT_DEPART_COL])

        # adjust allowed depart range of next trip
        has_next_trip = (nth_trips.next_trip_id != NO_TRIP_ID)
        if has_next_trip.any():
            next_trip_ids = nth_trips.next_trip_id[has_next_trip]
            # patch choice any trips with next_trips that weren't scheduled
            trips.loc[next_trip_ids, ADJUST_NEXT_DEPART_COL] = \
                choices.reindex(next_trip_ids.index).fillna(trips[ADJUST_NEXT_DEPART_COL]).values

        result_list.append(choices)

        chunk.log_df(trace_label, f'result_list', result_list)

        first_trip_in_leg = False

    if len(result_list) > 1:
        choices = pd.concat(result_list)

    return choices
Beispiel #15
0
def run_trip_scheduling_choice(spec, tours, skims, locals_dict, chunk_size,
                               trace_hh_id, trace_label):

    NUM_TOUR_LEGS = 3
    trace_label = tracing.extend_trace_label(trace_label,
                                             'interaction_sample_simulate')

    # FIXME: The duration, start, and end should be ints well before we get here...
    tours[TOUR_DURATION_COLUMN] = tours[TOUR_DURATION_COLUMN].astype(np.int8)

    # Setup boolean columns to make it easier to identify
    # intermediate stops later in the model.
    tours[HAS_OB_STOPS] = tours[NUM_OB_STOPS] >= 1
    tours[HAS_IB_STOPS] = tours[NUM_IB_STOPS] >= 1

    # Calculate a matrix with the appropriate alternative sizes
    # based on the total tour duration. This is used to calculate
    # chunk sizes.
    max_duration = tours[TOUR_DURATION_COLUMN].max()
    alt_sizes = generate_alternative_sizes(max_duration, NUM_TOUR_LEGS)

    # Assert the number of tour leg schedule alternatives for each tour
    tours[NUM_ALTERNATIVES] = 1
    tours.loc[tours[HAS_OB_STOPS] != tours[HAS_IB_STOPS],
              NUM_ALTERNATIVES] = tours[TOUR_DURATION_COLUMN] + 1
    tours.loc[tours[HAS_OB_STOPS] & tours[HAS_IB_STOPS], NUM_ALTERNATIVES] = \
        tours.apply(lambda x: alt_sizes[1, x.duration], axis=1)

    # If no intermediate stops on the tour, then then main leg duration
    # equals the tour duration and the intermediate durations are zero
    tours.loc[~tours[HAS_OB_STOPS] & ~tours[HAS_IB_STOPS],
              MAIN_LEG_DURATION] = tours[TOUR_DURATION_COLUMN]
    tours.loc[~tours[HAS_OB_STOPS] & ~tours[HAS_IB_STOPS],
              [IB_DURATION, OB_DURATION]] = 0

    # We only need to determine schedules for tours with intermediate stops
    indirect_tours = tours.loc[tours[HAS_OB_STOPS] | tours[HAS_IB_STOPS]]

    if len(indirect_tours) > 0:

        # Iterate through the chunks
        result_list = []
        for i, choosers, chunk_trace_label in \
                chunk.adaptive_chunked_choosers(indirect_tours, chunk_size, trace_label):

            # Sort the choosers and get the schedule alternatives
            choosers = choosers.sort_index()
            schedules = generate_schedule_alternatives(choosers).sort_index()

            # Assuming we did the max_alt_size calculation correctly,
            # we should get the same sizes here.
            assert choosers[NUM_ALTERNATIVES].sum() == schedules.shape[0]

            # Run the simulation
            choices = _interaction_sample_simulate(
                choosers=choosers,
                alternatives=schedules,
                spec=spec,
                choice_column=SCHEDULE_ID,
                allow_zero_probs=True,
                zero_prob_choice_val=-999,
                log_alt_losers=False,
                want_logsums=False,
                skims=skims,
                locals_d=locals_dict,
                trace_label=chunk_trace_label,
                trace_choice_name='trip_schedule_stage_1',
                estimator=None)

            assert len(choices.index) == len(choosers.index)

            choices = schedules[schedules[SCHEDULE_ID].isin(choices)]

            result_list.append(choices)

            chunk.log_df(trace_label, f'result_list', result_list)

        # FIXME: this will require 2X RAM
        # if necessary, could append to hdf5 store on disk:
        # http://pandas.pydata.org/pandas-docs/stable/io.html#id2
        if len(result_list) > 1:
            choices = pd.concat(result_list)

        assert len(choices.index) == len(indirect_tours.index)

        # The choices here are only the indirect tours, so the durations
        # need to be updated on the main tour dataframe.
        tours.update(choices[[MAIN_LEG_DURATION, OB_DURATION, IB_DURATION]])

    # Cleanup data types and drop temporary columns
    tours[[MAIN_LEG_DURATION, OB_DURATION, IB_DURATION]] = \
        tours[[MAIN_LEG_DURATION, OB_DURATION, IB_DURATION]].astype(np.int8)
    tours = tours.drop(columns=TEMP_COLS)

    return tours
Beispiel #16
0
def _run_cdap(
        persons,
        cdap_indiv_spec,
        interaction_coefficients,
        cdap_fixed_relative_proportions,
        locals_d,
        trace_hh_id, trace_label):
    """
    Implements core run_cdap functionality on persons df (or chunked subset thereof)
    Aside from chunking of persons df, params are passed through from run_cdap unchanged
    """

    # assign integer cdap_rank to each household member
    # persons with cdap_rank 1..MAX_HHSIZE will be have their activities chose by CDAP model
    # extra household members, will have activities assigned by in fixed proportions
    assign_cdap_rank(persons, trace_hh_id, trace_label)

    # Calculate CDAP utilities for each individual, ignoring interactions
    # ind_utils has index of 'person_id' and a column for each alternative
    # i.e. three columns 'M' (Mandatory), 'N' (NonMandatory), 'H' (Home)
    indiv_utils = individual_utilities(persons[persons.cdap_rank <= MAX_HHSIZE],
                                       cdap_indiv_spec, locals_d,
                                       trace_hh_id, trace_label)

    # compute interaction utilities, probabilities, and hh activity pattern choices
    # for each size household separately in turn up to MAX_HHSIZE
    hh_choices_list = []
    for hhsize in range(1, MAX_HHSIZE+1):

        choices = household_activity_choices(
            indiv_utils, interaction_coefficients, hhsize=hhsize,
            trace_hh_id=trace_hh_id, trace_label=trace_label)

        hh_choices_list.append(choices)

    del indiv_utils

    # concat all the household choices into a single series indexed on _hh_index_
    hh_activity_choices = pd.concat(hh_choices_list)

    # unpack the household activity choice list into choices for each (non-extra) household member
    # resulting series contains one activity per individual hh member, indexed on _persons_index_
    cdap_person_choices \
        = unpack_cdap_indiv_activity_choices(persons, hh_activity_choices,
                                             trace_hh_id, trace_label)

    # assign activities to extra household members (with cdap_rank > MAX_HHSIZE)
    # resulting series contains one activity per individual hh member, indexed on _persons_index_
    extra_person_choices \
        = extra_hh_member_choices(persons, cdap_fixed_relative_proportions, locals_d,
                                  trace_hh_id, trace_label)

    # concat cdap and extra persoin choices into a single series
    # this series will be the same length as the persons dataframe and be indexed on _persons_index_

    person_choices = pd.concat([cdap_person_choices, extra_person_choices])

    persons['cdap_activity'] = person_choices

    cdap_results = persons[['cdap_rank', 'cdap_activity']]

    # if DUMP:
    #     tracing.trace_df(hh_activity_choices, '%s.DUMP.hh_activity_choices' % trace_label,
    #                      transpose=False, slicer='NONE')
    #     tracing.trace_df(cdap_results, '%s.DUMP.cdap_results' % trace_label,
    #                      transpose=False, slicer='NONE')

    chunk.log_df(trace_label, 'persons', persons)

    # return dataframe with two columns
    return cdap_results
Beispiel #17
0
    def compute_tap_tap_time(self, recipe, access_df, egress_df,
                             chooser_attributes, path_info, trace_label,
                             trace):

        trace_label = tracing.extend_trace_label(trace_label,
                                                 'compute_tap_tap_time')

        with chunk.chunk_log(trace_label):

            model_constants = self.network_los.setting(
                f'TVPB_SETTINGS.{recipe}.CONSTANTS')
            tap_tap_settings = self.network_los.setting(
                f'TVPB_SETTINGS.{recipe}.tap_tap_settings')

            with memo(
                    "#TVPB CACHE compute_tap_tap_utilities all_transit_paths"):
                transit_df = self.all_transit_paths(access_df, egress_df,
                                                    chooser_attributes,
                                                    trace_label, trace)
                # note: transit_df index is arbitrary
                chunk.log_df(trace_label, "transit_df", transit_df)

            # some expressions may want to know access mode -
            locals_dict = path_info.copy()
            locals_dict['los'] = self.network_los
            locals_dict.update(model_constants)

            assignment_spec = assign.read_assignment_spec(
                file_name=config.config_file_path(tap_tap_settings['SPEC']))

            DEDUPE = True
            if DEDUPE:

                # assign uid for reduping
                max_atap = transit_df.atap.max() + 1
                transit_df[
                    'uid'] = transit_df.btap * max_atap + transit_df.atap

                # dedupe
                chooser_attribute_columns = list(chooser_attributes.columns)
                unique_transit_df = \
                    transit_df.loc[~transit_df.uid.duplicated(), ['btap', 'atap', 'uid'] + chooser_attribute_columns]
                unique_transit_df.set_index('uid', inplace=True)
                chunk.log_df(trace_label, "unique_transit_df",
                             unique_transit_df)

                logger.debug(
                    f"#TVPB CACHE deduped transit_df from {len(transit_df)} to {len(unique_transit_df)}"
                )

                # assign_variables
                results, _, _ = assign.assign_variables(
                    assignment_spec, unique_transit_df, locals_dict)
                assert len(results.columns == 1)
                unique_transit_df['transit'] = results

                # redupe results back into transit_df
                with memo("#TVPB compute_tap_tap_time redupe transit_df"):
                    transit_df['transit'] = reindex(unique_transit_df.transit,
                                                    transit_df.uid)

                del transit_df['uid']
                del unique_transit_df
                chunk.log_df(trace_label, "transit_df", transit_df)
                chunk.log_df(trace_label, "unique_transit_df", None)

            else:
                results, _, _ = assign.assign_variables(
                    assignment_spec, transit_df, locals_dict)
                assert len(results.columns == 1)
                transit_df['transit'] = results

            # filter out unavailable btap_atap pairs
            logger.debug(
                f"{(transit_df['transit'] <= 0).sum()} unavailable tap_tap pairs out of {len(transit_df)}"
            )
            transit_df = transit_df[transit_df.transit > 0]

            transit_df.drop(columns=chooser_attributes.columns, inplace=True)

            chunk.log_df(trace_label, "transit_df", None)

            if trace:
                self.trace_df(transit_df, trace_label, 'transit_df')

        return transit_df
Beispiel #18
0
def _run_cdap(
        persons,
        cdap_indiv_spec,
        interaction_coefficients,
        cdap_fixed_relative_proportions,
        locals_d,
        trace_hh_id, trace_label):
    """
    Implements core run_cdap functionality on persons df (or chunked subset thereof)
    Aside from chunking of persons df, params are passed through from run_cdap unchanged

    Returns pandas Dataframe with two columns:
        cdap_activity : str
            activity for that person expressed as 'M', 'N', 'H'
        cdap_rank : int
            activities for persons with cdap_rank <= MAX_HHSIZE are determined by cdap
            'extra' household members activities are assigned by cdap_fixed_relative_proportions
    """

    # assign integer cdap_rank to each household member
    # persons with cdap_rank 1..MAX_HHSIZE will be have their activities chose by CDAP model
    # extra household members, will have activities assigned by in fixed proportions
    assign_cdap_rank(persons, trace_hh_id, trace_label)

    # Calculate CDAP utilities for each individual, ignoring interactions
    # ind_utils has index of 'person_id' and a column for each alternative
    # i.e. three columns 'M' (Mandatory), 'N' (NonMandatory), 'H' (Home)
    indiv_utils = individual_utilities(persons[persons.cdap_rank <= MAX_HHSIZE],
                                       cdap_indiv_spec, locals_d,
                                       trace_hh_id, trace_label)

    # compute interaction utilities, probabilities, and hh activity pattern choices
    # for each size household separately in turn up to MAX_HHSIZE
    hh_choices_list = []
    for hhsize in range(1, MAX_HHSIZE+1):

        choices = household_activity_choices(
            indiv_utils, interaction_coefficients, hhsize=hhsize,
            trace_hh_id=trace_hh_id, trace_label=trace_label)

        hh_choices_list.append(choices)

    del indiv_utils

    # concat all the household choices into a single series indexed on _hh_index_
    hh_activity_choices = pd.concat(hh_choices_list)

    # unpack the household activity choice list into choices for each (non-extra) household member
    # resulting series contains one activity per individual hh member, indexed on _persons_index_
    cdap_person_choices \
        = unpack_cdap_indiv_activity_choices(persons, hh_activity_choices,
                                             trace_hh_id, trace_label)

    # assign activities to extra household members (with cdap_rank > MAX_HHSIZE)
    # resulting series contains one activity per individual hh member, indexed on _persons_index_
    extra_person_choices \
        = extra_hh_member_choices(persons, cdap_fixed_relative_proportions, locals_d,
                                  trace_hh_id, trace_label)

    # concat cdap and extra persoin choices into a single series
    # this series will be the same length as the persons dataframe and be indexed on _persons_index_

    person_choices = pd.concat([cdap_person_choices, extra_person_choices])

    persons['cdap_activity'] = person_choices

    # if DUMP:
    #     tracing.trace_df(hh_activity_choices, '%s.DUMP.hh_activity_choices' % trace_label,
    #                      transpose=False, slicer='NONE')
    #     tracing.trace_df(cdap_results, '%s.DUMP.cdap_results' % trace_label,
    #                      transpose=False, slicer='NONE')

    chunk.log_df(trace_label, 'persons', persons)

    return persons[['cdap_rank', 'cdap_activity']]
Beispiel #19
0
def tdd_interaction_dataset(tours, alts, timetable, choice_column,
                            window_id_col, trace_label):
    """
    interaction_sample_simulate expects
    alts index same as choosers (e.g. tour_id)
    name of choice column in alts

    Parameters
    ----------
    tours : pandas DataFrame
        must have person_id column and index on tour_id
    alts : pandas DataFrame
        alts index must be timetable tdd id
    timetable : TimeTable object
    choice_column : str
        name of column to store alt index in alt_tdd DataFrame
        (since alt_tdd is duplicate index on person_id but unique on person_id,alt_id)

    Returns
    -------
    alt_tdd : pandas DataFrame
        columns: start, end , duration, <choice_column>
        index: tour_id


    """

    trace_label = tracing.extend_trace_label(trace_label,
                                             'tdd_interaction_dataset')

    with chunk.chunk_log(trace_label):
        alts_ids = np.tile(alts.index, len(tours.index))
        chunk.log_df(trace_label, 'alts_ids', alts_ids)

        tour_ids = np.repeat(tours.index, len(alts.index))
        window_row_ids = np.repeat(tours[window_id_col], len(alts.index))

        alt_tdd = alts.take(alts_ids)

        alt_tdd.index = tour_ids
        alt_tdd[window_id_col] = window_row_ids

        # add tdd alternative id
        # by convention, the choice column is the first column in the interaction dataset
        alt_tdd.insert(loc=0, column=choice_column, value=alts_ids)

        # slice out all non-available tours
        available = timetable.tour_available(alt_tdd[window_id_col],
                                             alt_tdd[choice_column])
        logger.debug(
            f"tdd_interaction_dataset keeping {available.sum()} of ({len(available)}) available alt_tdds"
        )
        assert available.any()

        chunk.log_df(trace_label, 'alt_tdd',
                     alt_tdd)  # catch this before we slice on available

        alt_tdd = alt_tdd[available]

        chunk.log_df(trace_label, 'alt_tdd', alt_tdd)

        # FIXME - don't need this any more after slicing
        del alt_tdd[window_id_col]

    return alt_tdd
def schedule_trips_in_leg(outbound, trips, probs_spec, model_settings,
                          is_last_iteration, trace_hh_id, trace_label):
    """

    Parameters
    ----------
    outbound
    trips
    probs_spec
    depart_alt_base
    is_last_iteration
    trace_hh_id
    trace_label

    Returns
    -------
    choices: pd.Series
        depart choice for trips, indexed by trip_id
    """

    failfix = model_settings.get(FAILFIX, FAILFIX_DEFAULT)
    depart_alt_base = model_settings.get('DEPART_ALT_BASE', 0)
    scheduling_mode = model_settings.get('scheduling_mode', 'departure')

    if scheduling_mode == 'departure':
        probs_join_cols = model_settings.get(
            'probs_join_cols', PROBS_JOIN_COLUMNS_DEPARTURE_BASED)
    elif scheduling_mode == 'stop_duration':
        probs_join_cols = model_settings.get(
            'probs_join_cols', PROBS_JOIN_COLUMNS_DURATION_BASED)
    else:
        logger.error(
            "Invalid scheduling mode specified: {0}.".format(scheduling_mode),
            "Please select one of ['departure', 'stop_duration'] and try again."
        )

    # logger.debug("%s scheduling %s trips" % (trace_label, trips.shape[0]))

    assert len(trips) > 0
    assert (trips.outbound == outbound).all()

    result_list = []

    # trips to/from tour origin or atwork get tour_hour departure times
    # no need to schedule them if there are no intermediate stops
    to_from_tour_orig = (trips.trip_num == 1) if outbound else (
        trips.trip_num == trips.trip_count)
    do_not_schedule = to_from_tour_orig | (trips.primary_purpose == 'atwork')
    choices = trips.tour_hour[do_not_schedule]

    if do_not_schedule.all():
        return choices

    result_list.append(choices)
    trips = trips[~do_not_schedule]

    # add next_trip_id temp column, and specificy departure constraint column to update
    trips = trips.sort_index()
    if outbound or scheduling_mode == DURATION_MODE:
        trips['next_trip_id'] = np.roll(trips.index, -1)
        is_final = trips.trip_num == trips.trip_count
        # each trip's depart constrains next trip's earliest depart option
        ADJUST_NEXT_DEPART_COL = 'earliest'
    else:
        trips['next_trip_id'] = np.roll(trips.index, 1)
        is_final = trips.trip_num == 1
        # if inbound, we are handling in reverse order, so each choice
        # constrains latest depart of the preceding trip
        ADJUST_NEXT_DEPART_COL = 'latest'
    trips.next_trip_id = trips.next_trip_id.where(~is_final, NO_TRIP_ID)

    first_trip_in_leg = True
    for i in range(trips.trip_num.min(), trips.trip_num.max() + 1):

        if outbound or scheduling_mode == DURATION_MODE:
            # iterate in ascending trip_num order
            nth_trips = trips[trips.trip_num == i]
        else:
            # iterate over inbound trips in descending trip_num order, skipping the final trip
            nth_trips = trips[trips.trip_num == trips.trip_count - i]

        nth_trace_label = tracing.extend_trace_label(trace_label, 'num_%s' % i)

        choices = ps.make_scheduling_choices(
            nth_trips,
            scheduling_mode,
            probs_spec,
            probs_join_cols,
            depart_alt_base,
            first_trip_in_leg=first_trip_in_leg,
            report_failed_trips=is_last_iteration,
            trace_hh_id=trace_hh_id,
            trace_label=nth_trace_label)

        # most initial departure (when no choice was made because all probs were zero)
        if is_last_iteration and (failfix == FAILFIX_CHOOSE_MOST_INITIAL):
            choices = choices.reindex(nth_trips.index)
            logger.warning("%s coercing %s depart choices to most initial" %
                           (nth_trace_label, choices.isna().sum()))
            choices = choices.fillna(trips[ADJUST_NEXT_DEPART_COL])

        # adjust allowed depart range of next trip
        has_next_trip = (nth_trips.next_trip_id != NO_TRIP_ID)
        if has_next_trip.any():
            next_trip_ids = nth_trips.next_trip_id[has_next_trip]
            # patch choice any trips with next_trips that weren't scheduled
            trips.loc[next_trip_ids, ADJUST_NEXT_DEPART_COL] = \
                choices.reindex(next_trip_ids.index).fillna(trips[ADJUST_NEXT_DEPART_COL]).values

        result_list.append(choices)

        chunk.log_df(trace_label, f'result_list', result_list)

        first_trip_in_leg = False

    if len(result_list) > 1:
        choices = pd.concat(result_list)

    return choices
Beispiel #21
0
    def compute_tap_tap_utilities(self, recipe, access_df, egress_df,
                                  chooser_attributes, path_info, trace_label,
                                  trace):
        """
        create transit_df and compute utilities for all atap-btap pairs between omaz in access and dmaz in egress_df
        compute the utilities using the tap_tap utility expressions file specified in tap_tap_settings

        transit_df contains all possible access omaz/btap to egress dmaz/atap transit path pairs for each chooser

        trace should be True as we don't encourage/support dynamic utility computation except when tracing
        (precompute being fairly fast)

        Parameters
        ----------
        recipe: str
           'recipe' key in network_los.yaml TVPB_SETTINGS e.g. tour_mode_choice
        access_df: pandas.DataFrame
            dataframe with 'idx' and 'omaz' columns
        egress_df: pandas.DataFrame
            dataframe with 'idx' and 'dmaz' columns
        chooser_attributes: dict
        path_info
        trace_label: str
        trace: boolean

        Returns
        -------
        transit_df: pandas.dataframe
        """

        assert trace

        trace_label = tracing.extend_trace_label(trace_label,
                                                 'compute_tap_tap_utils')

        with chunk.chunk_log(trace_label):

            model_constants = self.network_los.setting(
                f'TVPB_SETTINGS.{recipe}.CONSTANTS')
            tap_tap_settings = self.network_los.setting(
                f'TVPB_SETTINGS.{recipe}.tap_tap_settings')

            with memo(
                    "#TVPB CACHE compute_tap_tap_utilities all_transit_paths"):
                transit_df = self.all_transit_paths(access_df, egress_df,
                                                    chooser_attributes,
                                                    trace_label, trace)
                # note: transit_df index is arbitrary
            chunk.log_df(trace_label, "transit_df", transit_df)

            # FIXME some expressions may want to know access mode -
            locals_dict = path_info.copy()
            locals_dict.update(model_constants)

            # columns needed for compute_utilities
            chooser_columns = ['btap', 'atap'] + list(
                chooser_attributes.columns)

            # deduplicate transit_df to unique_transit_df
            with memo(
                    "#TVPB compute_tap_tap_utilities deduplicate transit_df"):

                attribute_segments = \
                    self.network_los.setting('TVPB_SETTINGS.tour_mode_choice.tap_tap_settings.attribute_segments')
                scalar_attributes = {
                    k: locals_dict[k]
                    for k in attribute_segments.keys() if k not in transit_df
                }

                transit_df['uid'] = self.uid_calculator.get_unique_ids(
                    transit_df, scalar_attributes)

                unique_transit_df = transit_df.loc[
                    ~transit_df.uid.duplicated(), chooser_columns + ['uid']]
                logger.debug(
                    f"#TVPB CACHE deduped transit_df from {len(transit_df)} to {len(unique_transit_df)}"
                )

                unique_transit_df.set_index('uid', inplace=True)
                chunk.log_df(trace_label, "unique_transit_df",
                             unique_transit_df)

                transit_df = transit_df[['idx', 'btap', 'atap',
                                         'uid']]  # don't need chooser columns
                chunk.log_df(trace_label, "transit_df", transit_df)

            logger.debug(
                f"#TVPB CACHE compute_tap_tap_utilities dedupe transit_df "
                f"from {len(transit_df)} to {len(unique_transit_df)} rows")

            num_unique_transit_rows = len(unique_transit_df)  # errcheck
            logger.debug(
                f"#TVPB CACHE compute_tap_tap_utilities compute_utilities for {len(unique_transit_df)} rows"
            )

            with memo("#TVPB compute_tap_tap_utilities compute_utilities"):
                unique_utilities_df = compute_utilities(
                    self.network_los,
                    tap_tap_settings,
                    choosers=unique_transit_df,
                    model_constants=locals_dict,
                    trace_label=trace_label,
                    trace=trace,
                    trace_column_names=chooser_columns if trace else None)
                chunk.log_df(trace_label, "unique_utilities_df",
                             unique_utilities_df)
                chunk.log_df(trace_label, "unique_transit_df",
                             unique_transit_df)  # annotated

                if trace:
                    # combine unique_transit_df with unique_utilities_df for legibility
                    omnibus_df = pd.merge(unique_transit_df,
                                          unique_utilities_df,
                                          left_index=True,
                                          right_index=True,
                                          how='left')
                    self.trace_df(omnibus_df, trace_label,
                                  'unique_utilities_df')
                    chunk.log_df(trace_label, "omnibus_df", omnibus_df)
                    del omnibus_df
                    chunk.log_df(trace_label, "omnibus_df", None)

            assert num_unique_transit_rows == len(
                unique_utilities_df)  # errcheck

            # redupe unique_transit_df back into transit_df
            with memo("#TVPB compute_tap_tap_utilities redupe transit_df"):

                # idx = transit_df.index
                transit_df = pd.merge(transit_df,
                                      unique_utilities_df,
                                      left_on='uid',
                                      right_index=True)
                del transit_df['uid']
                # transit_df.index = idx
                # note: left merge on columns does not preserve index,
                # but transit_df index is arbitrary so no need to restore

                chunk.log_df(trace_label, "transit_df", transit_df)

            for c in unique_utilities_df:
                assert ERR_CHECK and not transit_df[c].isnull().any()

            if len(unique_transit_df) > 0:
                # if all rows were cached, then unique_utilities_df is just a ref to cache
                del unique_utilities_df
                chunk.log_df(trace_label, "unique_utilities_df", None)

            chunk.log_df(trace_label, "transit_df", None)

            if trace:
                self.trace_df(transit_df, trace_label, 'transit_df')

        return transit_df
Beispiel #22
0
def run_cdap(persons,
             person_type_map,
             cdap_indiv_spec,
             cdap_interaction_coefficients,
             cdap_fixed_relative_proportions,
             locals_d,
             chunk_size=0,
             trace_hh_id=None,
             trace_label=None):
    """
    Choose individual activity patterns for persons.

    Parameters
    ----------
    persons : pandas.DataFrame
        Table of persons data. Must contain at least a household ID, household size,
        person type category, and age, plus any columns used in cdap_indiv_spec
    cdap_indiv_spec : pandas.DataFrame
        CDAP spec for individuals without taking any interactions into account.
    cdap_interaction_coefficients : pandas.DataFrame
        Rules and coefficients for generating interaction specs for different household sizes
    cdap_fixed_relative_proportions : pandas.DataFrame
        Spec to for the relative proportions of each activity (M, N, H)
        to choose activities for additional household members not handled by CDAP
    locals_d : Dict
        This is a dictionary of local variables that will be the environment
        for an evaluation of an expression that begins with @
        in either the cdap_indiv_spec or cdap_fixed_relative_proportions expression files
    chunk_size: int
        Chunk size or 0 for no chunking
    trace_hh_id : int
        hh_id to trace or None if no hh tracing
    trace_label : str
        label for tracing or None if no tracing

    Returns
    -------
    choices : pandas.DataFrame

        dataframe is indexed on _persons_index_ and has two columns:

        cdap_activity : str
            activity for that person expressed as 'M', 'N', 'H'
    """

    trace_label = tracing.extend_trace_label(trace_label, 'cdap')

    result_list = []
    # segment by person type and pick the right spec for each person type
    for i, persons_chunk, chunk_trace_label \
            in chunk.adaptive_chunked_choosers_by_chunk_id(persons, chunk_size, trace_label):

        cdap_results = \
            _run_cdap(persons_chunk,
                      person_type_map,
                      cdap_indiv_spec,
                      cdap_interaction_coefficients,
                      cdap_fixed_relative_proportions,
                      locals_d,
                      trace_hh_id, chunk_trace_label)

        result_list.append(cdap_results)

        chunk.log_df(trace_label, f'result_list', result_list)

    # FIXME: this will require 2X RAM
    # if necessary, could append to hdf5 store on disk:
    # http://pandas.pydata.org/pandas-docs/stable/io.html#id2
    if len(result_list) > 1:
        cdap_results = pd.concat(result_list)

    if trace_hh_id:

        tracing.trace_df(cdap_results,
                         label="cdap",
                         columns=['cdap_rank', 'cdap_activity'],
                         warn_if_empty=True)

    # return choices column as series
    return cdap_results['cdap_activity']
Beispiel #23
0
def eval_and_sum(assignment_expressions,
                 df,
                 locals_dict,
                 group_by_column_names=None,
                 df_alias=None,
                 chunk_size=0,
                 trace_rows=None):
    """
    Evaluate assignment_expressions against df, and sum the results
    (sum by group if list of group_by_column_names is specified.
    e.g. group by coc column names and return sums grouped by community of concern.)

    Parameters
    ----------
    assignment_expressions
    df
    locals_dict
    group_by_column_names : array of str
        list of names of the columns to group by (e.g. coc_column_names of trip_coc_end)
    df_alias : str
        assign_variables df_alias (name of df in assignment_expressions)
    chunk_size : int
    trace_rows : array of bool
        array indicating which rows in df are to be traced

    Returns
    -------

    """

    if group_by_column_names is None:
        group_by_column_names = []

    rows_per_chunk, effective_chunk_size = \
        calc_rows_per_chunk(chunk_size, df, assignment_expressions,
                            extra_columns=len(group_by_column_names),
                            trace_label='eval_and_sum')

    logger.info("eval_and_sum chunk_size %s rows_per_chunk %s df rows %s" %
                (effective_chunk_size, rows_per_chunk, df.shape[0]))

    summary = None
    result_list = []
    trace_results = []
    trace_assigned_locals = {}

    for i, num_chunks, df_chunk, trace_rows_chunk in chunked_df(
            df, rows_per_chunk, trace_rows):

        logger.info("eval_and_sum chunk %s of %s" % (i, num_chunks))

        logger.debug("eval_and_sum chunk %s assign variables" % (i, ))
        assigned_chunk, trace_chunk, trace_assigned_locals_chunk = \
            assign.assign_variables(assignment_expressions,
                                    df_chunk,
                                    locals_dict=locals_dict,
                                    df_alias=df_alias,
                                    trace_rows=trace_rows_chunk)

        # sum this chunk
        logger.debug("eval_and_sum chunk %s sum" % (i, ))
        if group_by_column_names:
            # concat in the group_by columns
            for c in group_by_column_names:
                assigned_chunk[c] = df_chunk[c]
            # sum this chunk
            summary = assigned_chunk.groupby(group_by_column_names).sum()
        else:
            summary = assigned_chunk.sum().to_frame().T

        result_list.append(summary)

        if trace_chunk is not None:
            trace_results.append(trace_chunk)

        if trace_assigned_locals_chunk is not None:
            trace_assigned_locals.update(trace_assigned_locals_chunk)

        # note: chunk size will log low if there are more spec temp vars than extra_columns
        trace_label = 'eval_and_sum chunk_%s' % i
        chunk.log_open(trace_label, chunk_size, effective_chunk_size)
        chunk.log_df(trace_label, 'df_chunk', df_chunk)
        chunk.log_df(trace_label, 'assigned_chunk', assigned_chunk)
        chunk.log_close(trace_label)

    assert result_list

    # squash multiple chunk summaries
    if len(result_list) > 1:
        logger.debug("eval_and_sum squash chunk summaries")

        summary = pd.concat(result_list)

        if group_by_column_names:
            summary.reset_index(inplace=True)
            summary = summary.groupby(group_by_column_names).sum()
        else:
            summary = summary.sum().to_frame().T

    if trace_results:
        trace_results = pd.concat(trace_results)
        # trace_rows index values should match index of original df
        trace_results.index = df[trace_rows].index
    else:
        trace_results = None

    return summary, trace_results, trace_assigned_locals
Beispiel #24
0
    def build_virtual_path(self,
                           recipe,
                           path_type,
                           orig,
                           dest,
                           tod,
                           demographic_segment,
                           want_choices,
                           trace_label,
                           filter_targets=None,
                           trace=False,
                           override_choices=None):

        trace_label = tracing.extend_trace_label(trace_label,
                                                 'build_virtual_path')

        # Tracing is implemented as a seperate, second call that operates ONLY on filter_targets
        assert not (trace and filter_targets is None)
        if filter_targets is not None:
            assert filter_targets.any()

            # slice orig and dest
            orig = orig[filter_targets]
            dest = dest[filter_targets]
            assert len(orig) > 0
            assert len(dest) > 0

            # slice tod and demographic_segment if not scalar
            if not isinstance(tod, str):
                tod = tod[filter_targets]
            if demographic_segment is not None:
                demographic_segment = demographic_segment[filter_targets]
                assert len(demographic_segment) > 0

            # slice choices
            # (requires actual choices from the previous call lest rands change on second call)
            assert want_choices == (override_choices is not None)
            if want_choices:
                override_choices = override_choices[filter_targets]

        units = self.units_for_recipe(recipe)
        assert units == 'utility' or not want_choices, "'want_choices' only supported supported if units is utility"

        access_mode = self.network_los.setting(
            f'TVPB_SETTINGS.{recipe}.path_types.{path_type}.access')
        egress_mode = self.network_los.setting(
            f'TVPB_SETTINGS.{recipe}.path_types.{path_type}.egress')
        path_types_settings = self.network_los.setting(
            f'TVPB_SETTINGS.{recipe}.path_types.{path_type}')
        attributes_as_columns = \
            self.network_los.setting(f'TVPB_SETTINGS.{recipe}.tap_tap_settings.attributes_as_columns', [])

        path_info = {
            'path_type': path_type,
            'access_mode': access_mode,
            'egress_mode': egress_mode
        }

        # maz od pairs requested
        with memo("#TVPB build_virtual_path maz_od_df"):
            maz_od_df = pd.DataFrame({
                'idx': orig.index.values,
                'omaz': orig.values,
                'dmaz': dest.values,
                'seq': range(len(orig))
            })
            chunk.log_df(trace_label, "maz_od_df", maz_od_df)
            self.trace_maz_tap(maz_od_df, access_mode, egress_mode)

        # for location choice, there will be multiple alt dest rows per chooser and duplicate orig.index values
        # but tod and demographic_segment should be the same for all chooser rows (unique orig index values)
        # knowing this allows us to eliminate redundant computations (e.g. utilities of maz_tap pairs)
        duplicated = orig.index.duplicated(keep='first')
        chooser_attributes = pd.DataFrame(index=orig.index[~duplicated])
        if not isinstance(tod, str):
            chooser_attributes['tod'] = tod.loc[~duplicated]
        elif 'tod' in attributes_as_columns:
            chooser_attributes['tod'] = tod
        else:
            path_info['tod'] = tod
        if demographic_segment is not None:
            chooser_attributes[
                'demographic_segment'] = demographic_segment.loc[~duplicated]

        with memo("#TVPB build_virtual_path access_df"):
            access_df = self.compute_maz_tap_utilities(recipe,
                                                       maz_od_df,
                                                       chooser_attributes,
                                                       leg='access',
                                                       mode=access_mode,
                                                       trace_label=trace_label,
                                                       trace=trace)
        chunk.log_df(trace_label, "access_df", access_df)

        with memo("#TVPB build_virtual_path egress_df"):
            egress_df = self.compute_maz_tap_utilities(recipe,
                                                       maz_od_df,
                                                       chooser_attributes,
                                                       leg='egress',
                                                       mode=egress_mode,
                                                       trace_label=trace_label,
                                                       trace=trace)
        chunk.log_df(trace_label, "egress_df", egress_df)

        # path_info for use by expressions (e.g. penalty for drive access if no parking at access tap)
        with memo("#TVPB build_virtual_path compute_tap_tap"):
            transit_df = self.compute_tap_tap(recipe,
                                              maz_od_df,
                                              access_df,
                                              egress_df,
                                              chooser_attributes,
                                              path_info=path_info,
                                              trace_label=trace_label,
                                              trace=trace)
        chunk.log_df(trace_label, "transit_df", transit_df)

        with memo("#TVPB build_virtual_path best_paths"):
            path_df = self.best_paths(recipe, path_type, maz_od_df, access_df,
                                      egress_df, transit_df, trace_label,
                                      trace)
        chunk.log_df(trace_label, "path_df", path_df)

        # now that we have created path_df, we are done with the dataframes for the separate legs
        del access_df
        chunk.log_df(trace_label, "access_df", None)
        del egress_df
        chunk.log_df(trace_label, "egress_df", None)
        del transit_df
        chunk.log_df(trace_label, "transit_df", None)

        if units == 'utility':

            # logsums
            with memo("#TVPB build_virtual_path logsums"):
                # one row per seq with utilities in columns
                # path_num 0-based to aligh with logit.make_choices 0-based choice indexes
                path_df['path_num'] = path_df.groupby('seq').cumcount()
                chunk.log_df(trace_label, "path_df", path_df)

                utilities_df = path_df[['seq', 'path_num',
                                        units]].set_index(['seq', 'path_num'
                                                           ]).unstack()
                utilities_df.columns = utilities_df.columns.droplevel(
                )  # for legibility

                # add rows missing because no access or egress availability
                utilities_df = pd.concat(
                    [pd.DataFrame(index=maz_od_df.seq), utilities_df], axis=1)
                utilities_df = utilities_df.fillna(
                    UNAVAILABLE
                )  # set utilities for missing paths to UNAVAILABLE

                chunk.log_df(trace_label, "utilities_df", utilities_df)

                with warnings.catch_warnings(record=True) as w:
                    # Cause all warnings to always be triggered.
                    # most likely "divide by zero encountered in log" caused by all transit sets non-viable
                    warnings.simplefilter("always")

                    paths_nest_nesting_coefficient = path_types_settings.get(
                        'paths_nest_nesting_coefficient', 1)
                    exp_utilities = np.exp(utilities_df.values /
                                           paths_nest_nesting_coefficient)
                    logsums = np.maximum(
                        np.log(np.nansum(exp_utilities, axis=1)), UNAVAILABLE)

                    if len(w) > 0:
                        for wrn in w:
                            logger.warning(
                                f"{trace_label} - {type(wrn).__name__} ({wrn.message})"
                            )

                        DUMP = False
                        if DUMP:
                            zero_utilities_df = utilities_df[np.nansum(
                                np.exp(utilities_df.values), axis=1) == 0]
                            zero_utilities_df.to_csv(config.output_file_path(
                                'warning_utilities_df.csv'),
                                                     index=True)
                            bug

            if want_choices:

                # orig index to identify appropriate random number channel to use making choices
                utilities_df.index = orig.index

                with memo("#TVPB build_virtual_path make_choices"):

                    probs = logit.utils_to_probs(utilities_df,
                                                 allow_zero_probs=True,
                                                 trace_label=trace_label)
                    chunk.log_df(trace_label, "probs", probs)

                    if trace:
                        choices = override_choices

                        utilities_df['choices'] = choices
                        self.trace_df(utilities_df, trace_label,
                                      'utilities_df')

                        probs['choices'] = choices
                        self.trace_df(probs, trace_label, 'probs')
                    else:

                        choices, rands = logit.make_choices(
                            probs,
                            allow_bad_probs=True,
                            trace_label=trace_label)

                        chunk.log_df(trace_label, "rands", rands)
                        del rands
                        chunk.log_df(trace_label, "rands", None)

                    del probs
                    chunk.log_df(trace_label, "probs", None)

                # we need to get path_set, btap, atap from path_df row with same seq and path_num
                # drop seq join column, but keep path_num of choice to override_choices when tracing
                columns_to_cache = ['btap', 'atap', 'path_set', 'path_num']
                logsum_df = \
                    pd.merge(pd.DataFrame({'seq': range(len(orig)), 'path_num': choices.values}),
                             path_df[['seq'] + columns_to_cache],
                             on=['seq', 'path_num'], how='left')\
                    .drop(columns=['seq'])\
                    .set_index(orig.index)

                logsum_df['logsum'] = logsums

            else:

                assert len(logsums) == len(orig)
                logsum_df = pd.DataFrame({'logsum': logsums}, index=orig.index)

            chunk.log_df(trace_label, "logsum_df", logsum_df)

            del utilities_df
            chunk.log_df(trace_label, "utilities_df", None)

            if trace:
                self.trace_df(logsum_df, trace_label, 'logsum_df')

            chunk.log_df(trace_label, "logsum_df", logsum_df)
            results = logsum_df

        else:
            assert units == 'time'

            # return a series
            results = pd.Series(path_df[units].values, index=path_df['idx'])

            # zero-fill rows for O-D pairs where no best path exists because there was no tap-tap transit availability
            results = reindex(results, maz_od_df.idx).fillna(0.0)

            chunk.log_df(trace_label, "results", results)

        assert len(results) == len(orig)

        del path_df
        chunk.log_df(trace_label, "path_df", None)

        # diagnostic
        # maz_od_df['DIST'] = self.network_los.get_default_skim_dict().get('DIST').get(maz_od_df.omaz, maz_od_df.dmaz)
        # maz_od_df[units] = results.logsum if units == 'utility' else results.values
        # print(f"maz_od_df\n{maz_od_df}")

        return results
Beispiel #25
0
def physical_activity_processor(trips_with_demographics, persons_merged,
                                physical_activity_trip_spec,
                                physical_activity_person_spec,
                                physical_activity_settings, coc_column_names,
                                settings, chunk_size, trace_hh_id):
    """
    Compute physical benefits

    Physical activity benefits generally accrue if the net physical activity for an individual
    exceeds a certain threshold. We calculate individual physical activity based on trips,
    so we need to compute trip activity and then sum up to the person level to calculate benefits.
    We chunk trips by household id to ensure that all of a persons trips are in the same chunk.
    """

    trips_df = trips_with_demographics.to_frame()
    persons_df = persons_merged.to_frame()
    trace_label = 'physical_activity'

    logger.info(
        "Running physical_activity_processor with %d trips for %d persons " %
        (len(trips_df), len(persons_df)))

    locals_dict = config.get_model_constants(physical_activity_settings)
    locals_dict.update(config.setting('globals'))

    trip_trace_rows = trace_hh_id and trips_df.household_id == trace_hh_id

    rows_per_chunk, effective_chunk_size = \
        physical_activity_rpc(chunk_size, trips_df, persons_df,
                              physical_activity_trip_spec, trace_label)

    logger.info("physical_activity_processor chunk_size %s rows_per_chunk %s" %
                (chunk_size, rows_per_chunk))

    coc_summary = None
    result_list = []

    # iterate over trips df chunked by hh_id
    for i, num_chunks, trips_chunk, trace_rows_chunk \
            in bca.chunked_df_by_chunk_id(trips_df, trip_trace_rows, rows_per_chunk):

        logger.info("%s chunk %s of %s" % (trace_label, i, num_chunks))

        trip_activity, trip_trace_results, trip_trace_assigned_locals = \
            assign.assign_variables(physical_activity_trip_spec,
                                    trips_chunk,
                                    locals_dict=locals_dict,
                                    df_alias='trips',
                                    trace_rows=trace_rows_chunk)

        # since tracing is at household level, trace_results will occur in only one chunk
        # we can just write them out when we see them without need to accumulate across chunks
        if trip_trace_results is not None:
            tracing.write_csv(trip_trace_results,
                              file_name="physical_activity_trips",
                              index_label='trip_id',
                              column_labels=['label', 'trip'])

            if trip_trace_assigned_locals:
                tracing.write_csv(trip_trace_assigned_locals,
                                  file_name="physical_activity_trips_locals")

        # sum trip activity for each unique person
        trip_activity = trip_activity.groupby(trips_chunk.person_id).sum()

        # merge in persons columns for this chunk
        persons_chunk = pd.merge(trip_activity,
                                 persons_df,
                                 left_index=True,
                                 right_index=True)

        # trace rows array for this chunk
        person_trace_rows = trace_hh_id and persons_chunk[
            'household_id'] == trace_hh_id

        person_activity, person_trace_results, person_trace_assigned_locals = \
            assign.assign_variables(physical_activity_person_spec,
                                    persons_chunk,
                                    locals_dict=locals_dict,
                                    df_alias='persons',
                                    trace_rows=person_trace_rows)

        # since tracing is at household level, trace_results will occur in only one chunk
        # we can just write them out when we see them without need to accumulate across chunks
        if person_trace_results is not None:
            tracing.write_csv(person_trace_results,
                              file_name="physical_activity_persons",
                              index_label='persons_merged_table_index',
                              column_labels=['label', 'person'])

            if person_trace_assigned_locals:
                tracing.write_csv(person_trace_assigned_locals,
                                  file_name="physical_activity_persons_locals")

        # concat in the coc columns and summarize the chunk by coc
        person_activity = pd.concat(
            [persons_chunk[coc_column_names], person_activity], axis=1)
        coc_summary = person_activity.groupby(coc_column_names).sum()

        result_list.append(coc_summary)

        chunk_trace_label = 'trace_label chunk_%s' % i
        chunk.log_open(chunk_trace_label, chunk_size, effective_chunk_size)
        chunk.log_df(chunk_trace_label, 'trips_chunk', trips_chunk)
        chunk.log_df(chunk_trace_label, 'persons_chunk', persons_chunk)
        chunk.log_close(chunk_trace_label)

    if len(result_list) > 1:

        # (if there was only one chunk, then concat is redundant)
        coc_summary = pd.concat(result_list)

        # squash the accumulated chunk summaries by reapplying group and sum
        coc_summary.reset_index(inplace=True)
        coc_summary = coc_summary.groupby(coc_column_names).sum()

    result_prefix = 'PA_'
    add_result_columns("coc_results", coc_summary, result_prefix)
    add_summary_results(coc_summary,
                        prefix=result_prefix,
                        spec=physical_activity_person_spec)
Beispiel #26
0
def _schedule_tours(tours, persons_merged, alts, spec, logsum_tour_purpose,
                    model_settings, skims, timetable, window_id_col,
                    previous_tour, tour_owner_id_col, estimator,
                    tour_trace_label):
    """
    previous_tour stores values used to add columns that can be used in the spec
    which have to do with the previous tours per person.  Every column in the
    alternatives table is appended with the suffix "_previous" and made
    available.  So if your alternatives table has columns for start and end,
    then start_previous and end_previous will be set to the start and end of
    the most recent tour for a person.  The first time through,
    start_previous and end_previous are undefined, so make sure to protect
    with a tour_num >= 2 in the variable computation.

    Parameters
    ----------
    tours : DataFrame
        chunk of tours to schedule with unique timetable window_id_col
    persons_merged : DataFrame
        DataFrame of persons to be merged with tours containing attributes referenced
        by expressions in spec
    alts : DataFrame
        DataFrame of alternatives which represent all possible time slots.
        tdd_interaction_dataset function will use timetable to filter them to omit
        unavailable alternatives
    spec : DataFrame
        The spec which will be passed to interaction_simulate.
    model_settings : dict
    timetable : TimeTable
        timetable of timewidows for person (or subtour) with rows for tours[window_id_col]
    window_id_col : str
        column name from tours that identifies timetable owner (or None if tours index)
        - person_id for non/mandatory tours
        - parent_tour_id for subtours,
        - None (tours index) for joint_tours since every tour may have different participants)
    previous_tour: Series
        series with value of tdd_alt choice for last previous tour scheduled for
    tour_owner_id_col : str
        column name from tours that identifies 'owner' of this tour
        (person_id for non/mandatory tours, parent_tour_id for subtours,
        household_id for joint_tours)
    tour_trace_label

    Returns
    -------

    """

    logger.info("%s schedule_tours running %d tour choices" %
                (tour_trace_label, len(tours)))

    # merge persons into tours
    # avoid dual suffix for redundant columns names (e.g. household_id) that appear in both
    tours = pd.merge(tours,
                     persons_merged,
                     left_on='person_id',
                     right_index=True,
                     suffixes=('', '_y'))
    chunk.log_df(tour_trace_label, "tours", tours)

    # - add explicit window_id_col for timetable owner if it is index
    # if no timetable window_id_col specified, then add index as an explicit column
    # (this is not strictly necessary but its presence makes code simpler in several places)
    if window_id_col is None:
        window_id_col = tours.index.name
        tours[window_id_col] = tours.index

    # timetable can't handle multiple tours per window_id
    assert not tours[window_id_col].duplicated().any()

    # - build interaction dataset filtered to include only available tdd alts
    # dataframe columns start, end , duration, person_id, tdd
    # indexed (not unique) on tour_id
    choice_column = TDD_CHOICE_COLUMN
    alt_tdd = tdd_interaction_dataset(tours, alts, timetable, choice_column,
                                      window_id_col, tour_trace_label)
    # print(f"tours {tours.shape} alts {alts.shape}")
    chunk.log_df(tour_trace_label, "alt_tdd", alt_tdd)

    # - add logsums
    if logsum_tour_purpose:
        logsums = \
            compute_logsums(alt_tdd, tours, logsum_tour_purpose, model_settings, skims, tour_trace_label)
    else:
        logsums = 0
    alt_tdd['mode_choice_logsum'] = logsums

    # - merge in previous tour columns
    # adds start_previous and end_previous, joins on index
    tours = \
        tours.join(get_previous_tour_by_tourid(tours[tour_owner_id_col], previous_tour, alts))
    chunk.log_df(tour_trace_label, "tours", tours)

    # - make choices
    locals_d = {'tt': timetable}
    constants = config.get_model_constants(model_settings)
    if constants is not None:
        locals_d.update(constants)

    if not RUN_ALTS_PREPROCESSOR_BEFORE_MERGE:
        # Note: Clint was running alts_preprocessor here on tdd_interaction_dataset instead of on raw (unmerged) alts
        # and he was using logsum_tour_purpose as selector, although logically it should be the spec_segment
        # It just happened to work for example_arc.mandatory_tour_scheduling because, in that model, (unlike semcog)
        # logsum_tour_purpose and spec_segments are aligned (both logsums and spec are segmented on work, school, univ)
        # In any case, I don't see any benefit to doing this here - at least not for any existing implementations
        # but if we do, it will require passing spec_segment to schedule_tours  and _schedule_tours
        # or redundently segmenting alts (yuck!) to conform to more granular tour_segmentation (e.g. univ do school)
        spec_segment = logsum_tour_purpose  # FIXME this is not always right - see note above
        alt_tdd = run_alts_preprocessor(model_settings, alt_tdd, spec_segment,
                                        locals_d, tour_trace_label)
        chunk.log_df(tour_trace_label, "alt_tdd", alt_tdd)

    if estimator:
        # write choosers after annotation
        estimator.write_choosers(tours)
        estimator.set_alt_id(choice_column)
        estimator.write_interaction_sample_alternatives(alt_tdd)

    choices = interaction_sample_simulate(tours,
                                          alt_tdd,
                                          spec,
                                          choice_column=choice_column,
                                          locals_d=locals_d,
                                          chunk_size=0,
                                          trace_label=tour_trace_label,
                                          estimator=estimator)

    # - update previous_tour and timetable parameters

    # update previous_tour (series with most recent previous tdd choices) with latest values
    previous_tour.loc[tours[tour_owner_id_col]] = choices.values

    # update timetable with chosen tdd footprints
    timetable.assign(tours[window_id_col], choices)

    return choices
Beispiel #27
0
def schedule_tours(tours, persons_merged, alts, spec, logsum_tour_purpose,
                   model_settings, timetable, timetable_window_id_col,
                   previous_tour, tour_owner_id_col, estimator, chunk_size,
                   tour_trace_label, tour_chunk_tag):
    """
    chunking wrapper for _schedule_tours

    While interaction_sample_simulate provides chunking support, the merged tours, persons
    dataframe and the tdd_interaction_dataset are very big, so we want to create them inside
    the chunking loop to minimize memory footprint. So we implement the chunking loop here,
    and pass a chunk_size of 0 to interaction_sample_simulate to disable its chunking support.

    """

    if not tours.index.is_monotonic_increasing:
        logger.info(
            "schedule_tours %s tours not monotonic_increasing - sorting df")
        tours = tours.sort_index()

    logger.info("%s schedule_tours running %d tour choices" %
                (tour_trace_label, len(tours)))

    # no more than one tour per timetable_window per call
    if timetable_window_id_col is None:
        assert not tours.index.duplicated().any()
    else:
        assert not tours[timetable_window_id_col].duplicated().any()

    if 'LOGSUM_SETTINGS' in model_settings:
        # we need skims to calculate tvpb skim overhead in 3_ZONE systems for use by calc_rows_per_chunk
        skims = skims_for_logsums(logsum_tour_purpose, model_settings,
                                  tour_trace_label)
    else:
        skims = None

    result_list = []
    for i, chooser_chunk, chunk_trace_label \
            in chunk.adaptive_chunked_choosers(tours, chunk_size, tour_trace_label, tour_chunk_tag):

        choices = _schedule_tours(chooser_chunk,
                                  persons_merged,
                                  alts,
                                  spec,
                                  logsum_tour_purpose,
                                  model_settings,
                                  skims,
                                  timetable,
                                  timetable_window_id_col,
                                  previous_tour,
                                  tour_owner_id_col,
                                  estimator,
                                  tour_trace_label=chunk_trace_label)

        result_list.append(choices)

        chunk.log_df(tour_trace_label, f'result_list', result_list)

    # FIXME: this will require 2X RAM
    # if necessary, could append to hdf5 store on disk:
    # http://pandas.pydata.org/pandas-docs/stable/io.html#id2
    if len(result_list) > 1:
        choices = pd.concat(result_list)

    assert len(choices.index == len(tours.index))

    return choices
Beispiel #28
0
def schedule_nth_trips(trips, probs_spec, model_settings, first_trip_in_leg,
                       report_failed_trips, trace_hh_id, trace_label):
    """
    We join each trip with the appropriate row in probs_spec by joining on probs_join_cols,
    which should exist in both trips, probs_spec dataframe.

    Parameters
    ----------
    trips: pd.DataFrame
    probs_spec: pd.DataFrame
        Dataframe of probs for choice of depart times and join columns to match them with trips.
        Depart columns names are irrelevant. Instead, they are position dependent,
        time period choice is their index + depart_alt_base
    depart_alt_base: int
        int to add to probs column index to get time period it represents.
        e.g. depart_alt_base = 5 means first column (column 0) represents 5 am
    report_failed_trips : bool
    trace_hh_id
    trace_label

    Returns
    -------
    choices: pd.Series
        time periods depart choices, one per trip (except for trips with zero probs)
    """

    depart_alt_base = model_settings.get('DEPART_ALT_BASE')

    probs_cols = [c for c in probs_spec.columns if c not in PROBS_JOIN_COLUMNS]

    # left join trips to probs (there may be multiple rows per trip for multiple depart ranges)
    choosers = pd.merge(trips.reset_index(),
                        probs_spec,
                        on=PROBS_JOIN_COLUMNS,
                        how='left').set_index('trip_id')
    chunk.log_df(trace_label, "choosers", choosers)

    if trace_hh_id and tracing.has_trace_targets(trips):
        tracing.trace_df(choosers, '%s.choosers' % trace_label)

    # choosers should now match trips row for row
    assert choosers.index.is_unique
    assert len(choosers.index) == len(trips.index)

    # zero out probs outside earliest-latest window
    chooser_probs = clip_probs(trips, choosers[probs_cols], model_settings)
    chunk.log_df(trace_label, "chooser_probs", chooser_probs)

    if first_trip_in_leg:
        # probs should sum to 1 unless all zero
        chooser_probs = chooser_probs.div(chooser_probs.sum(axis=1),
                                          axis=0).fillna(0)

    # probs should sum to 1 with residual probs resulting in choice of 'fail'
    chooser_probs['fail'] = 1 - chooser_probs.sum(axis=1).clip(0, 1)
    chunk.log_df(trace_label, "chooser_probs", chooser_probs)

    if trace_hh_id and tracing.has_trace_targets(trips):
        tracing.trace_df(chooser_probs, '%s.chooser_probs' % trace_label)

    choices, rands = logit.make_choices(chooser_probs,
                                        trace_label=trace_label,
                                        trace_choosers=choosers)

    chunk.log_df(trace_label, "choices", choices)
    chunk.log_df(trace_label, "rands", rands)

    if trace_hh_id and tracing.has_trace_targets(trips):
        tracing.trace_df(choices,
                         '%s.choices' % trace_label,
                         columns=[None, 'depart'])
        tracing.trace_df(rands,
                         '%s.rands' % trace_label,
                         columns=[None, 'rand'])

    # convert alt choice index to depart time (setting failed choices to -1)
    failed = (choices == chooser_probs.columns.get_loc('fail'))
    choices = (choices + depart_alt_base).where(~failed, -1)

    chunk.log_df(trace_label, "failed", failed)

    # report failed trips while we have the best diagnostic info
    if report_failed_trips and failed.any():
        report_bad_choices(bad_row_map=failed,
                           df=choosers,
                           filename='failed_choosers',
                           trace_label=trace_label,
                           trace_choosers=None)

    # trace before removing failures
    if trace_hh_id and tracing.has_trace_targets(trips):
        tracing.trace_df(choices,
                         '%s.choices' % trace_label,
                         columns=[None, 'depart'])
        tracing.trace_df(rands,
                         '%s.rands' % trace_label,
                         columns=[None, 'rand'])

    # remove any failed choices
    if failed.any():
        choices = choices[~failed]

    assert (choices >= trips.earliest[~failed]).all()
    assert (choices <= trips.latest[~failed]).all()

    return choices
def _schedule_tours(
        tours, persons_merged, alts,
        spec, logsum_tour_purpose,
        model_settings,
        timetable, window_id_col,
        previous_tour, tour_owner_id_col,
        tour_trace_label):
    """
    previous_tour stores values used to add columns that can be used in the spec
    which have to do with the previous tours per person.  Every column in the
    alternatives table is appended with the suffix "_previous" and made
    available.  So if your alternatives table has columns for start and end,
    then start_previous and end_previous will be set to the start and end of
    the most recent tour for a person.  The first time through,
    start_previous and end_previous are undefined, so make sure to protect
    with a tour_num >= 2 in the variable computation.

    Parameters
    ----------
    tours : DataFrame
        chunk of tours to schedule with unique timetable window_id_col
    persons_merged : DataFrame
        DataFrame of persons to be merged with tours containing attributes referenced
        by expressions in spec
    alts : DataFrame
        DataFrame of alternatives which represent all possible time slots.
        tdd_interaction_dataset function will use timetable to filter them to omit
        unavailable alternatives
    spec : DataFrame
        The spec which will be passed to interaction_simulate.
    model_settings : dict
    timetable : TimeTable
        timetable of timewidows for person (or subtour) with rows for tours[window_id_col]
    window_id_col : str
        column name from tours that identifies timetable owner (or None if tours index)
        - person_id for non/mandatory tours
        - parent_tour_id for subtours,
        - None (tours index) for joint_tours since every tour may have different participants)
    previous_tour: Series
        series with value of tdd_alt choice for last previous tour scheduled for
    tour_owner_id_col : str
        column name from tours that identifies 'owner' of this tour
        (person_id for non/mandatory tours, parent_tour_id for subtours,
        household_id for joint_tours)
    tour_trace_label

    Returns
    -------

    """

    logger.info("%s schedule_tours running %d tour choices" % (tour_trace_label, len(tours)))

    # merge persons into tours
    # avoid dual suffix for redundant columns names (e.g. household_id) that appear in both
    tours = pd.merge(tours, persons_merged, left_on='person_id', right_index=True,
                     suffixes=('', '_y'))
    chunk.log_df(tour_trace_label, "tours", tours)

    # - add explicit window_id_col for timetable owner if it is index
    # if no timetable window_id_col specified, then add index as an explicit column
    # (this is not strictly necessary but its presence makes code simpler in several places)
    if window_id_col is None:
        window_id_col = tours.index.name
        tours[window_id_col] = tours.index

    # timetable can't handle multiple tours per window_id
    assert not tours[window_id_col].duplicated().any()

    # - build interaction dataset filtered to include only available tdd alts
    # dataframe columns start, end , duration, person_id, tdd
    # indexed (not unique) on tour_id
    choice_column = 'tdd'
    alt_tdd = tdd_interaction_dataset(tours, alts, timetable, choice_column, window_id_col,
                                      tour_trace_label)
    chunk.log_df(tour_trace_label, "alt_tdd", alt_tdd)

    # - add logsums
    if logsum_tour_purpose:
        logsums = \
            compute_logsums(alt_tdd, tours, logsum_tour_purpose, model_settings, tour_trace_label)
    else:
        logsums = 0
    alt_tdd['mode_choice_logsum'] = logsums

    # - merge in previous tour columns
    # adds start_previous and end_previous, joins on index
    tours = \
        tours.join(get_previous_tour_by_tourid(tours[tour_owner_id_col], previous_tour, alts))
    chunk.log_df(tour_trace_label, "tours", tours)

    # - make choices
    locals_d = {
        'tt': timetable
    }
    constants = config.get_model_constants(model_settings)
    if constants is not None:
        locals_d.update(constants)

    choices = interaction_sample_simulate(
        tours,
        alt_tdd,
        spec,
        choice_column=choice_column,
        locals_d=locals_d,
        chunk_size=0,
        trace_label=tour_trace_label
    )

    # - update previous_tour and timetable parameters

    # update previous_tour (series with most recent previous tdd choices) with latest values
    previous_tour.loc[tours[tour_owner_id_col]] = choices.values

    # update timetable with chosen tdd footprints
    timetable.assign(tours[window_id_col], choices)

    return choices
Beispiel #30
0
    def lookup_tap_tap_utilities(self, recipe, maz_od_df, access_df, egress_df,
                                 chooser_attributes, path_info, trace_label):
        """
        create transit_df and compute utilities for all atap-btap pairs between omaz in access and dmaz in egress_df
        look up the utilities in the precomputed tap_cache data (which is indexed by uid_calculator unique_ids)
        (unique_id can used as a zero-based index into the data array)

        transit_df contains all possible access omaz/btap to egress dmaz/atap transit path pairs for each chooser

        Parameters
        ----------
        recipe
        maz_od_df
        access_df
        egress_df
        chooser_attributes
        path_info
        trace_label

        Returns
        -------

        """

        trace_label = tracing.extend_trace_label(trace_label,
                                                 'lookup_tap_tap_utils')

        with chunk.chunk_log(trace_label):

            with memo(
                    "#TVPB CACHE lookup_tap_tap_utilities all_transit_paths"):
                transit_df = self.all_transit_paths(access_df,
                                                    egress_df,
                                                    chooser_attributes,
                                                    trace_label,
                                                    trace=False)
                # note: transit_df index is arbitrary
                chunk.log_df(trace_label, "transit_df", transit_df)

            if TRACE_COMPLEXITY:
                # diagnostic: log the omaz,dmaz pairs with the greatest number of virtual tap-tap paths
                num_paths = transit_df.groupby(['idx']).size().to_frame('n')
                num_paths = pd.merge(maz_od_df,
                                     num_paths,
                                     left_on='idx',
                                     right_index=True)
                num_paths = num_paths[[
                    'omaz', 'dmaz', 'n'
                ]].drop_duplicates(subset=['omaz', 'dmaz'])
                num_paths = num_paths.sort_values(
                    'n', ascending=False).reset_index(drop=True)
                logger.debug(f"num_paths\n{num_paths.head(10)}")

            # FIXME some expressions may want to know access mode -
            locals_dict = path_info.copy()

            # add uid column to transit_df
            with memo("#TVPB lookup_tap_tap_utilities assign uid"):
                attribute_segments = \
                    self.network_los.setting('TVPB_SETTINGS.tour_mode_choice.tap_tap_settings.attribute_segments')
                scalar_attributes = {
                    k: locals_dict[k]
                    for k in attribute_segments.keys() if k not in transit_df
                }

                transit_df.index = self.uid_calculator.get_unique_ids(
                    transit_df, scalar_attributes)
                transit_df = transit_df[[
                    'idx', 'btap', 'atap'
                ]]  # just needed chooser_columns for uid calculation
                chunk.log_df(trace_label, "transit_df add uid index",
                             transit_df)

            with memo("#TVPB lookup_tap_tap_utilities reindex transit_df"):
                utilities = self.tap_cache.data
                i = 0
                for column_name in self.uid_calculator.set_names:
                    transit_df[column_name] = utilities[
                        transit_df.index.values, i]
                    i += 1

            for c in self.uid_calculator.set_names:
                assert ERR_CHECK and not transit_df[c].isnull().any()

            chunk.log_df(trace_label, "transit_df", None)

        return transit_df
Beispiel #31
0
def compute_accessibilities_for_zones(accessibility_df, land_use_df,
                                      assignment_spec, constants, network_los,
                                      trace_od, trace_label):

    orig_zones = accessibility_df.index.values
    dest_zones = land_use_df.index.values

    orig_zone_count = len(orig_zones)
    dest_zone_count = len(dest_zones)

    logger.info("Running %s with %d orig zones %d dest zones" %
                (trace_label, orig_zone_count, dest_zone_count))

    # create OD dataframe
    od_df = pd.DataFrame(
        data={
            'orig': np.repeat(orig_zones, dest_zone_count),
            'dest': np.tile(dest_zones, orig_zone_count)
        })

    if trace_od:
        trace_orig, trace_dest = trace_od
        trace_od_rows = (od_df.orig == trace_orig) & (od_df.dest == trace_dest)
    else:
        trace_od_rows = None

    # merge land_use_columns into od_df
    od_df = pd.merge(od_df, land_use_df, left_on='dest',
                     right_index=True).sort_index()
    chunk.log_df(trace_label, "od_df", od_df)

    locals_d = {
        'log': np.log,
        'exp': np.exp,
        'network_los': network_los,
    }
    locals_d.update(constants)

    skim_dict = network_los.get_default_skim_dict()
    locals_d['skim_od'] = skim_dict.wrap('orig', 'dest').set_df(od_df)
    locals_d['skim_do'] = skim_dict.wrap('dest', 'orig').set_df(od_df)

    if network_los.zone_system == los.THREE_ZONE:
        locals_d['tvpb'] = network_los.tvpb

    results, trace_results, trace_assigned_locals \
        = assign.assign_variables(assignment_spec, od_df, locals_d,
                                  trace_rows=trace_od_rows, trace_label=trace_label, chunk_log=True)

    chunk.log_df(trace_label, "results", results)

    # accessibility_df = accessibility_df.copy()
    for column in results.columns:
        data = np.asanyarray(results[column])
        data.shape = (orig_zone_count, dest_zone_count)  # (o,d)
        accessibility_df[column] = np.log(np.sum(data, axis=1) + 1)

    if trace_od:

        if not trace_od_rows.any():
            logger.warning(
                f"trace_od not found origin = {trace_orig}, dest = {trace_dest}"
            )
        else:

            # add OD columns to trace results
            df = pd.concat([od_df[trace_od_rows], trace_results], axis=1)

            # dump the trace results table (with _temp variables) to aid debugging
            tracing.trace_df(df,
                             label='accessibility',
                             index_label='skim_offset',
                             slicer='NONE',
                             warn_if_empty=True)

            if trace_assigned_locals:
                tracing.write_csv(trace_assigned_locals,
                                  file_name="accessibility_locals")

    return (accessibility_df)
Beispiel #32
0
def compute_logsums(alt_tdd, tours_merged, tour_purpose, model_settings, skims,
                    trace_label):
    """
    Compute logsums for the tour alt_tdds, which will differ based on their different start, stop
    times of day, which translate to different odt_skim out_period and in_periods.

    In mtctm1, tdds are hourly, but there are only 5 skim time periods, so some of the tdd_alts
    will be the same, once converted to skim time periods. With 5 skim time periods there are
    15 unique out-out period pairs but 190 tdd alternatives.

    For efficiency, rather compute a lot of redundant logsums, we compute logsums for the unique
    (out-period, in-period) pairs and then join them back to the alt_tdds.
    """

    trace_label = tracing.extend_trace_label(trace_label, 'compute_logsums')
    network_los = inject.get_injectable('network_los')

    # - in_period and out_period
    assert 'out_period' not in alt_tdd
    assert 'in_period' not in alt_tdd
    alt_tdd['out_period'] = network_los.skim_time_period_label(
        alt_tdd['start'])
    alt_tdd['in_period'] = network_los.skim_time_period_label(alt_tdd['end'])
    alt_tdd['duration'] = alt_tdd['end'] - alt_tdd['start']

    # outside chunk_log context because we extend log_df call for alt_tdd made by our only caller _schedule_tours
    chunk.log_df(trace_label, "alt_tdd", alt_tdd)

    with chunk.chunk_log(trace_label):

        if USE_BRUTE_FORCE_TO_COMPUTE_LOGSUMS:
            # compute logsums for all the tour alt_tdds (inefficient)
            logsums = _compute_logsums(alt_tdd, tours_merged, tour_purpose,
                                       model_settings, network_los, skims,
                                       trace_label)
            return logsums

        index_name = alt_tdd.index.name
        deduped_alt_tdds, redupe_columns = dedupe_alt_tdd(
            alt_tdd, tour_purpose, trace_label)
        chunk.log_df(trace_label, "deduped_alt_tdds", deduped_alt_tdds)

        logger.info(
            f"{trace_label} compute_logsums "
            f"deduped_alt_tdds reduced number of rows by "
            f"{round(100 * (len(alt_tdd) - len(deduped_alt_tdds)) / len(alt_tdd), 2)}% "
            f"from {len(alt_tdd)} to {len(deduped_alt_tdds)} compared to USE_BRUTE_FORCE_TO_COMPUTE_LOGSUMS"
        )

        t0 = tracing.print_elapsed_time()

        # - compute logsums for the alt_tdd_periods
        deduped_alt_tdds['logsums'] = \
            _compute_logsums(deduped_alt_tdds, tours_merged, tour_purpose,
                             model_settings, network_los, skims, trace_label)

        # tracing.log_runtime(model_name=trace_label, start_time=t0)

        # redupe - join the alt_tdd_period logsums to alt_tdd to get logsums for alt_tdd
        logsums = pd.merge(alt_tdd.reset_index(),
                           deduped_alt_tdds.reset_index(),
                           on=[index_name] + redupe_columns,
                           how='left').set_index(index_name).logsums
        chunk.log_df(trace_label, "logsums", logsums)

        del deduped_alt_tdds
        chunk.log_df(trace_label, "deduped_alt_tdds", None)

        # this is really expensive
        TRACE = False
        if TRACE:
            trace_logsums_df = logsums.to_frame('representative_logsum')
            trace_logsums_df['brute_force_logsum'] = \
                _compute_logsums(alt_tdd, tours_merged, tour_purpose, model_settings, network_los, skims, trace_label)
            tracing.trace_df(trace_logsums_df,
                             label=tracing.extend_trace_label(
                                 trace_label, 'representative_logsums'),
                             slicer='NONE',
                             transpose=False)

    # leave it to our caller to pick up logsums with call to chunk.log_df
    return logsums