Exemple #1
0
def run_trip_scheduling(
        trips,
        tours,
        probs_spec,
        model_settings,
        estimator,
        is_last_iteration,
        chunk_size,
        chunk_tag,
        trace_hh_id,
        trace_label):

    # only non-initial trips require scheduling, segment handing first such trip in tour will use most space
    # is_outbound_chooser = (trips.trip_num > 1) & trips.outbound & (trips.primary_purpose != 'atwork')
    # is_inbound_chooser = (trips.trip_num < trips.trip_count) & ~trips.outbound & (trips.primary_purpose != 'atwork')
    # num_choosers = (is_inbound_chooser | is_outbound_chooser).sum()

    result_list = []
    for i, trips_chunk, chunk_trace_label \
            in chunk.adaptive_chunked_choosers_by_chunk_id(trips, chunk_size, trace_label, chunk_tag):

        if trips_chunk.outbound.any():
            leg_chunk = trips_chunk[trips_chunk.outbound]
            leg_trace_label = tracing.extend_trace_label(chunk_trace_label, 'outbound')
            choices = \
                schedule_trips_in_leg(
                    outbound=True,
                    trips=leg_chunk,
                    probs_spec=probs_spec,
                    model_settings=model_settings,
                    is_last_iteration=is_last_iteration,
                    trace_hh_id=trace_hh_id,
                    trace_label=leg_trace_label)
            result_list.append(choices)

            chunk.log_df(trace_label, f'result_list', result_list)

        if (~trips_chunk.outbound).any():
            leg_chunk = trips_chunk[~trips_chunk.outbound]
            leg_trace_label = tracing.extend_trace_label(chunk_trace_label, 'inbound')
            choices = \
                schedule_trips_in_leg(
                    outbound=False,
                    trips=leg_chunk,
                    probs_spec=probs_spec,
                    model_settings=model_settings,
                    is_last_iteration=is_last_iteration,
                    trace_hh_id=trace_hh_id,
                    trace_label=leg_trace_label)
            result_list.append(choices)

            chunk.log_df(trace_label, f'result_list', result_list)

    choices = pd.concat(result_list)

    return choices
def trip_scheduling(trips, tours, chunk_size, trace_hh_id):
    """
    Trip scheduling assigns depart times for trips within the start, end limits of the tour.

    The algorithm is simplistic:

    The first outbound trip starts at the tour start time, and subsequent outbound trips are
    processed in trip_num order, to ensure that subsequent trips do not depart before the
    trip that preceeds them.

    Inbound trips are handled similarly, except in reverse order, starting with the last trip,
    and working backwards to ensure that inbound trips do not depart after the trip that
    succeeds them.

    The probability spec assigns probabilities for depart times, but those possible departs must
    be clipped to disallow depart times outside the tour limits, the departs of prior trips, and
    in the case of work tours, the start/end times of any atwork subtours.

    Scheduling can fail if the probability table assigns zero probabilities to all the available
    depart times in a trip's depart window. (This could be avoided by giving every window a small
    probability, rather than zero, but the existing mtctm1 prob spec does not do this. I believe
    this is due to the its having been generated from a small household travel survey sample
    that lacked any departs for some time periods.)

    Rescheduling the trips that fail (along with their inbound or outbound leg-mates) can sometimes
    fix this problem, if it was caused by an earlier trip's depart choice blocking a subsequent
    trip's ability to schedule a depart within the resulting window. But it can also happen if
    a tour is very short (e.g. one time period) and the prob spec having a zero probability for
    that tour hour.

    Therefore we need to handle trips that could not be scheduled. There are two ways (at least)
    to solve this problem:

    1) choose_most_initial
    simply assign a depart time to the trip, even if it has a zero probability. It makes
    most sense, in this case, to assign the 'most initial' depart time, so that subsequent trips
    are minimally impacted. This can be done in the final iteration, thus affecting only the
    trips that could no be scheduled by the standard approach

    2) drop_and_cleanup
    drop trips that could no be scheduled, and adjust their leg mates, as is done for failed
    trips in trip_destination.

    Which option is applied is determined by the FAILFIX model setting

    """
    trace_label = "trip_scheduling"
    model_settings_file_name = 'trip_scheduling.yaml'
    model_settings = config.read_model_settings(model_settings_file_name)

    trips_df = trips.to_frame()
    tours = tours.to_frame()

    # add columns 'tour_hour', 'earliest', 'latest' to trips
    set_tour_hour(trips_df, tours)

    # trip_scheduling is a probabilistic model ane we don't support estimation,
    # but we do need to override choices in estimation mode
    estimator = estimation.manager.begin_estimation('trip_scheduling')
    if estimator:
        estimator.write_spec(model_settings, tag='PROBS_SPEC')
        estimator.write_model_settings(model_settings,
                                       model_settings_file_name)
        chooser_cols_for_estimation = [
            'person_id',
            'household_id',
            'tour_id',
            'trip_num',
            'trip_count',
            'primary_purpose',
            'outbound',
            'earliest',
            'latest',
            'tour_hour',
        ]
        estimator.write_choosers(trips_df[chooser_cols_for_estimation])

    probs_spec = pd.read_csv(
        config.config_file_path('trip_scheduling_probs.csv'), comment='#')
    # FIXME for now, not really doing estimation for probabilistic model - just overwriting choices
    # besides, it isn't clear that named coefficients would be helpful if we had some form of estimation
    # coefficients_df = simulate.read_model_coefficients(model_settings)
    # probs_spec = map_coefficients(probs_spec, coefficients_df)

    # add tour-based chunk_id so we can chunk all trips in tour together
    trips_df['chunk_id'] = reindex(
        pd.Series(list(range(len(tours))), tours.index), trips_df.tour_id)

    assert 'DEPART_ALT_BASE' in model_settings
    failfix = model_settings.get(FAILFIX, FAILFIX_DEFAULT)

    max_iterations = model_settings.get('MAX_ITERATIONS', 1)
    assert max_iterations > 0

    choices_list = []

    for chunk_i, trips_chunk, chunk_trace_label in chunk.adaptive_chunked_choosers_by_chunk_id(
            trips_df, chunk_size, trace_label, trace_label):

        i = 0
        while (i < max_iterations) and not trips_chunk.empty:

            # only chunk log first iteration since memory use declines with each iteration
            with chunk.chunk_log(
                    trace_label) if i == 0 else chunk.chunk_log_skip():

                i += 1
                is_last_iteration = (i == max_iterations)

                trace_label_i = tracing.extend_trace_label(
                    trace_label, "i%s" % i)
                logger.info("%s scheduling %s trips within chunk %s",
                            trace_label_i, trips_chunk.shape[0], chunk_i)

                choices = \
                    run_trip_scheduling(
                        trips_chunk,
                        tours,
                        probs_spec,
                        model_settings,
                        estimator=estimator,
                        is_last_iteration=is_last_iteration,
                        trace_hh_id=trace_hh_id,
                        chunk_size=chunk_size,
                        chunk_tag=trace_label,
                        trace_label=trace_label_i)

                # boolean series of trips whose individual trip scheduling failed
                failed = choices.reindex(trips_chunk.index).isnull()
                logger.info("%s %s failed", trace_label_i, failed.sum())

                if not is_last_iteration:
                    # boolean series of trips whose leg scheduling failed
                    failed_cohorts = failed_trip_cohorts(trips_chunk, failed)
                    trips_chunk = trips_chunk[failed_cohorts]
                    choices = choices[~failed_cohorts]

                choices_list.append(choices)

    trips_df = trips.to_frame()

    choices = pd.concat(choices_list)
    choices = choices.reindex(trips_df.index)

    if estimator:
        estimator.write_choices(choices)
        choices = estimator.get_survey_values(choices, 'trips',
                                              'depart')  # override choices
        estimator.write_override_choices(choices)
        estimator.end_estimation()
        assert not choices.isnull().any()

    if choices.isnull().any():
        logger.warning(
            "%s of %s trips could not be scheduled after %s iterations" %
            (choices.isnull().sum(), trips_df.shape[0], i))

        if failfix != FAILFIX_DROP_AND_CLEANUP:
            raise RuntimeError("%s setting '%s' not enabled in settings" %
                               (FAILFIX, FAILFIX_DROP_AND_CLEANUP))

        trips_df['failed'] = choices.isnull()
        trips_df = cleanup_failed_trips(trips_df)
        choices = choices.reindex(trips_df.index)

    trips_df['depart'] = choices

    assert not trips_df.depart.isnull().any()

    pipeline.replace_table("trips", trips_df)
Exemple #3
0
def run_cdap(persons,
             person_type_map,
             cdap_indiv_spec,
             cdap_interaction_coefficients,
             cdap_fixed_relative_proportions,
             locals_d,
             chunk_size=0,
             trace_hh_id=None,
             trace_label=None):
    """
    Choose individual activity patterns for persons.

    Parameters
    ----------
    persons : pandas.DataFrame
        Table of persons data. Must contain at least a household ID, household size,
        person type category, and age, plus any columns used in cdap_indiv_spec
    cdap_indiv_spec : pandas.DataFrame
        CDAP spec for individuals without taking any interactions into account.
    cdap_interaction_coefficients : pandas.DataFrame
        Rules and coefficients for generating interaction specs for different household sizes
    cdap_fixed_relative_proportions : pandas.DataFrame
        Spec to for the relative proportions of each activity (M, N, H)
        to choose activities for additional household members not handled by CDAP
    locals_d : Dict
        This is a dictionary of local variables that will be the environment
        for an evaluation of an expression that begins with @
        in either the cdap_indiv_spec or cdap_fixed_relative_proportions expression files
    chunk_size: int
        Chunk size or 0 for no chunking
    trace_hh_id : int
        hh_id to trace or None if no hh tracing
    trace_label : str
        label for tracing or None if no tracing

    Returns
    -------
    choices : pandas.DataFrame

        dataframe is indexed on _persons_index_ and has two columns:

        cdap_activity : str
            activity for that person expressed as 'M', 'N', 'H'
    """

    trace_label = tracing.extend_trace_label(trace_label, 'cdap')

    result_list = []
    # segment by person type and pick the right spec for each person type
    for i, persons_chunk, chunk_trace_label \
            in chunk.adaptive_chunked_choosers_by_chunk_id(persons, chunk_size, trace_label):

        cdap_results = \
            _run_cdap(persons_chunk,
                      person_type_map,
                      cdap_indiv_spec,
                      cdap_interaction_coefficients,
                      cdap_fixed_relative_proportions,
                      locals_d,
                      trace_hh_id, chunk_trace_label)

        result_list.append(cdap_results)

        chunk.log_df(trace_label, f'result_list', result_list)

    # FIXME: this will require 2X RAM
    # if necessary, could append to hdf5 store on disk:
    # http://pandas.pydata.org/pandas-docs/stable/io.html#id2
    if len(result_list) > 1:
        cdap_results = pd.concat(result_list)

    if trace_hh_id:

        tracing.trace_df(cdap_results,
                         label="cdap",
                         columns=['cdap_rank', 'cdap_activity'],
                         warn_if_empty=True)

    # return choices column as series
    return cdap_results['cdap_activity']
Exemple #4
0
def apply_stage_two_model(omnibus_spec, trips, chunk_size, trace_label):

    if not trips.index.is_monotonic:
        trips = trips.sort_index()

    # Assign the duration of the appropriate leg to the trip
    trips[TRIP_DURATION] = np.where(trips[OUTBOUND], trips[OB_DURATION], trips[IB_DURATION])

    trips['depart'] = -1

    # If this is the first outbound trip, the choice is easy, assign the depart time
    # to equal the tour start time.
    trips.loc[(trips['trip_num'] == 1) & (trips[OUTBOUND]), 'depart'] = trips['start']

    # If its the first return leg, it is easy too. Just assign the trip start time to the
    # end time minus the IB duration
    trips.loc[(trips['trip_num'] == 1) & (~trips[OUTBOUND]), 'depart'] = trips['end'] - trips[IB_DURATION]

    # The last leg of the outbound tour needs to begin at the start plus OB duration
    trips.loc[(trips['trip_count'] == trips['trip_num']) & (trips[OUTBOUND]), 'depart'] = \
        trips['start'] + trips[OB_DURATION]

    # The last leg of the inbound tour needs to begin at the end time of the tour
    trips.loc[(trips['trip_count'] == trips['trip_num']) & (~trips[OUTBOUND]), 'depart'] = \
        trips['end']

    # Slice off the remaining trips with an intermediate stops to deal with.
    # Hopefully, with the tricks above we've sliced off a lot of choices.
    # This slice should only include trip numbers greater than 2 since the
    side_trips = trips[(trips['trip_num'] != 1) & (trips['trip_count'] != trips['trip_num'])]

    # No processing needs to be done because we have simple trips / tours
    if side_trips.empty:
        assert trips['depart'].notnull().all
        return trips['depart'].astype(int)

    # Get the potential time windows
    time_windows = get_time_windows(side_trips[TRIP_DURATION].max(), side_trips[TRIP_COUNT].max() - 1)

    trip_list = []

    for i, chooser_chunk, chunk_trace_label in \
            chunk.adaptive_chunked_choosers_by_chunk_id(side_trips, chunk_size, trace_label):

        for is_outbound, trip_segment in chooser_chunk.groupby(OUTBOUND):
            direction = OUTBOUND if is_outbound else 'inbound'
            spec = get_spec_for_segment(omnibus_spec, direction)
            segment_trace_label = '{}_{}'.format(direction, chunk_trace_label)

            patterns = build_patterns(trip_segment, time_windows)

            choices = choose_tour_leg_pattern(trip_segment,
                                              patterns, spec, trace_label=segment_trace_label)

            choices = pd.merge(choices.reset_index(), patterns.reset_index(),
                               on=[TOUR_LEG_ID, PATTERN_ID], how='left')

            choices = choices[['trip_id', 'stop_time_duration']].copy()

            trip_list.append(choices)

    trip_list = pd.concat(trip_list, sort=True).set_index('trip_id')
    trips['stop_time_duration'] = 0
    trips.update(trip_list)
    trips.loc[trips['trip_num'] == 1, 'stop_time_duration'] = trips['depart']
    trips.sort_values(['tour_id', 'outbound', 'trip_num'])
    trips['stop_time_duration'] = trips.groupby(['tour_id', 'outbound'])['stop_time_duration'].cumsum()
    trips.loc[trips['trip_num'] != trips['trip_count'], 'depart'] = trips['stop_time_duration']
    return trips['depart'].astype(int)