Beispiel #1
0
def trip_destination(
        trips,
        tours_merged,
        chunk_size, trace_hh_id):
    """
    Choose a destination for all 'intermediate' trips based on trip purpose.

    Final trips already have a destination (the primary tour destination for outbound trips,
    and home for inbound trips.)


    """
    trace_label = 'trip_destination'
    model_settings = config.read_model_settings('trip_destination.yaml')
    CLEANUP = model_settings.get('CLEANUP', True)

    trips_df = trips.to_frame()
    tours_merged_df = tours_merged.to_frame()

    logger.info("Running %s with %d trips", trace_label, trips_df.shape[0])

    trips_df = run_trip_destination(
        trips_df,
        tours_merged_df,
        chunk_size, trace_hh_id,
        trace_label)

    if trips_df.failed.any():
        logger.warning("%s %s failed trips", trace_label, trips_df.failed.sum())
        file_name = "%s_failed_trips" % trace_label
        logger.info("writing failed trips to %s", file_name)
        tracing.write_csv(trips_df[trips_df.failed], file_name=file_name, transpose=False)

    if CLEANUP:
        trips_df = cleanup_failed_trips(trips_df)
    elif trips_df.failed.any():
        logger.warning("%s keeping %s sidelined failed trips" %
                       (trace_label, trips_df.failed.sum()))

    pipeline.replace_table("trips", trips_df)

    print("trips_df\n", trips_df.shape)

    if trace_hh_id:
        tracing.trace_df(trips_df,
                         label=trace_label,
                         slicer='trip_id',
                         index_label='trip_id',
                         warn_if_empty=True)
Beispiel #2
0
def trip_destination(trips, tours_merged, chunk_size, trace_hh_id):
    """
    Choose a destination for all 'intermediate' trips based on trip purpose.

    Final trips already have a destination (the primary tour destination for outbound trips,
    and home for inbound trips.)


    """
    trace_label = 'trip_destination'

    model_settings_file_name = 'trip_destination.yaml'
    model_settings = config.read_model_settings(model_settings_file_name)

    CLEANUP = model_settings.get('CLEANUP', True)
    fail_some_trips_for_testing = model_settings.get(
        'fail_some_trips_for_testing', False)

    trips_df = trips.to_frame()
    tours_merged_df = tours_merged.to_frame()

    estimator = estimation.manager.begin_estimation('trip_destination')

    if estimator:
        estimator.write_coefficients(model_settings=model_settings)
        # estimator.write_spec(model_settings, tag='SAMPLE_SPEC')
        estimator.write_spec(model_settings, tag='SPEC')
        estimator.set_alt_id(model_settings["ALT_DEST_COL_NAME"])
        estimator.write_table(inject.get_injectable('size_terms'),
                              'size_terms',
                              append=False)
        estimator.write_table(inject.get_table('land_use').to_frame(),
                              'landuse',
                              append=False)
        estimator.write_model_settings(model_settings,
                                       model_settings_file_name)

    logger.info("Running %s with %d trips", trace_label, trips_df.shape[0])

    trips_df, save_sample_df = run_trip_destination(
        trips_df,
        tours_merged_df,
        estimator=estimator,
        chunk_size=chunk_size,
        trace_hh_id=trace_hh_id,
        trace_label=trace_label,
        fail_some_trips_for_testing=fail_some_trips_for_testing)

    # testing feature t0 make sure at least one trip fails so trip_purpose_and_destination model is run
    if config.setting('testing_fail_trip_destination',
                      False) and not trips_df.failed.any():
        if (trips_df.trip_num < trips_df.trip_count).sum() == 0:
            raise RuntimeError(
                f"can't honor 'testing_fail_trip_destination' setting because no intermediate trips"
            )

        fail_o = trips_df[trips_df.trip_num < trips_df.trip_count].origin.max()
        trips_df.failed = (trips_df.origin == fail_o) & \
                          (trips_df.trip_num < trips_df.trip_count)

    if trips_df.failed.any():
        logger.warning("%s %s failed trips", trace_label,
                       trips_df.failed.sum())
        if inject.get_injectable('pipeline_file_prefix', None):
            file_name = f"{trace_label}_failed_trips_{inject.get_injectable('pipeline_file_prefix')}"
        else:
            file_name = f"{trace_label}_failed_trips"
        logger.info("writing failed trips to %s", file_name)
        tracing.write_csv(trips_df[trips_df.failed],
                          file_name=file_name,
                          transpose=False)

    if estimator:
        estimator.end_estimation()
        # no trips should have failed since we overwrite choices and sample should have not failed trips
        assert not trips_df.failed.any()

    if CLEANUP:

        if trips_df.failed.any():
            flag_failed_trip_leg_mates(trips_df, 'failed')

            if save_sample_df is not None:
                save_sample_df.drop(trips_df.index[trips_df.failed],
                                    level='trip_id',
                                    inplace=True)

            trips_df = cleanup_failed_trips(trips_df)

        trips_df.drop(columns='failed', inplace=True, errors='ignore')

    pipeline.replace_table("trips", trips_df)

    if trace_hh_id:
        tracing.trace_df(trips_df,
                         label=trace_label,
                         slicer='trip_id',
                         index_label='trip_id',
                         warn_if_empty=True)

    if save_sample_df is not None:
        # might be none if want_sample_table but there are no intermediate trips
        # expect samples only for intermediate trip destinations

        assert len(save_sample_df.index.get_level_values(0).unique()) == \
               len(trips_df[trips_df.trip_num < trips_df.trip_count])

        sample_table_name = model_settings.get('DEST_CHOICE_SAMPLE_TABLE_NAME')
        assert sample_table_name is not None

        logger.info("adding %s samples to %s" %
                    (len(save_sample_df), sample_table_name))

        # lest they try to put tour samples into the same table
        if pipeline.is_table(sample_table_name):
            raise RuntimeError("sample table %s already exists" %
                               sample_table_name)
        pipeline.extend_table(sample_table_name, save_sample_df)
def trip_scheduling(trips, tours, chunk_size, trace_hh_id):
    """
    Trip scheduling assigns depart times for trips within the start, end limits of the tour.

    The algorithm is simplistic:

    The first outbound trip starts at the tour start time, and subsequent outbound trips are
    processed in trip_num order, to ensure that subsequent trips do not depart before the
    trip that preceeds them.

    Inbound trips are handled similarly, except in reverse order, starting with the last trip,
    and working backwards to ensure that inbound trips do not depart after the trip that
    succeeds them.

    The probability spec assigns probabilities for depart times, but those possible departs must
    be clipped to disallow depart times outside the tour limits, the departs of prior trips, and
    in the case of work tours, the start/end times of any atwork subtours.

    Scheduling can fail if the probability table assigns zero probabilities to all the available
    depart times in a trip's depart window. (This could be avoided by giving every window a small
    probability, rather than zero, but the existing mtctm1 prob spec does not do this. I believe
    this is due to the its having been generated from a small household travel survey sample
    that lacked any departs for some time periods.)

    Rescheduling the trips that fail (along with their inbound or outbound leg-mates) can sometimes
    fix this problem, if it was caused by an earlier trip's depart choice blocking a subsequent
    trip's ability to schedule a depart within the resulting window. But it can also happen if
    a tour is very short (e.g. one time period) and the prob spec having a zero probability for
    that tour hour.

    Therefore we need to handle trips that could not be scheduled. There are two ways (at least)
    to solve this problem:

    1) choose_most_initial
    simply assign a depart time to the trip, even if it has a zero probability. It makes
    most sense, in this case, to assign the 'most initial' depart time, so that subsequent trips
    are minimally impacted. This can be done in the final iteration, thus affecting only the
    trips that could no be scheduled by the standard approach

    2) drop_and_cleanup
    drop trips that could no be scheduled, and adjust their leg mates, as is done for failed
    trips in trip_destination.

    Which option is applied is determined by the FAILFIX model setting

    """
    trace_label = "trip_scheduling"
    model_settings_file_name = 'trip_scheduling.yaml'
    model_settings = config.read_model_settings(model_settings_file_name)

    trips_df = trips.to_frame()
    tours = tours.to_frame()

    # add columns 'tour_hour', 'earliest', 'latest' to trips
    set_tour_hour(trips_df, tours)

    # trip_scheduling is a probabilistic model ane we don't support estimation,
    # but we do need to override choices in estimation mode
    estimator = estimation.manager.begin_estimation('trip_scheduling')
    if estimator:
        estimator.write_spec(model_settings, tag='PROBS_SPEC')
        estimator.write_model_settings(model_settings,
                                       model_settings_file_name)
        chooser_cols_for_estimation = [
            'person_id',
            'household_id',
            'tour_id',
            'trip_num',
            'trip_count',
            'primary_purpose',
            'outbound',
            'earliest',
            'latest',
            'tour_hour',
        ]
        estimator.write_choosers(trips_df[chooser_cols_for_estimation])

    probs_spec = pd.read_csv(
        config.config_file_path('trip_scheduling_probs.csv'), comment='#')
    # FIXME for now, not really doing estimation for probabilistic model - just overwriting choices
    # besides, it isn't clear that named coefficients would be helpful if we had some form of estimation
    # coefficients_df = simulate.read_model_coefficients(model_settings)
    # probs_spec = map_coefficients(probs_spec, coefficients_df)

    # add tour-based chunk_id so we can chunk all trips in tour together
    trips_df['chunk_id'] = reindex(
        pd.Series(list(range(len(tours))), tours.index), trips_df.tour_id)

    assert 'DEPART_ALT_BASE' in model_settings
    failfix = model_settings.get(FAILFIX, FAILFIX_DEFAULT)

    max_iterations = model_settings.get('MAX_ITERATIONS', 1)
    assert max_iterations > 0

    choices_list = []

    for chunk_i, trips_chunk, chunk_trace_label in chunk.adaptive_chunked_choosers_by_chunk_id(
            trips_df, chunk_size, trace_label, trace_label):

        i = 0
        while (i < max_iterations) and not trips_chunk.empty:

            # only chunk log first iteration since memory use declines with each iteration
            with chunk.chunk_log(
                    trace_label) if i == 0 else chunk.chunk_log_skip():

                i += 1
                is_last_iteration = (i == max_iterations)

                trace_label_i = tracing.extend_trace_label(
                    trace_label, "i%s" % i)
                logger.info("%s scheduling %s trips within chunk %s",
                            trace_label_i, trips_chunk.shape[0], chunk_i)

                choices = \
                    run_trip_scheduling(
                        trips_chunk,
                        tours,
                        probs_spec,
                        model_settings,
                        estimator=estimator,
                        is_last_iteration=is_last_iteration,
                        trace_hh_id=trace_hh_id,
                        chunk_size=chunk_size,
                        chunk_tag=trace_label,
                        trace_label=trace_label_i)

                # boolean series of trips whose individual trip scheduling failed
                failed = choices.reindex(trips_chunk.index).isnull()
                logger.info("%s %s failed", trace_label_i, failed.sum())

                if not is_last_iteration:
                    # boolean series of trips whose leg scheduling failed
                    failed_cohorts = failed_trip_cohorts(trips_chunk, failed)
                    trips_chunk = trips_chunk[failed_cohorts]
                    choices = choices[~failed_cohorts]

                choices_list.append(choices)

    trips_df = trips.to_frame()

    choices = pd.concat(choices_list)
    choices = choices.reindex(trips_df.index)

    if estimator:
        estimator.write_choices(choices)
        choices = estimator.get_survey_values(choices, 'trips',
                                              'depart')  # override choices
        estimator.write_override_choices(choices)
        estimator.end_estimation()
        assert not choices.isnull().any()

    if choices.isnull().any():
        logger.warning(
            "%s of %s trips could not be scheduled after %s iterations" %
            (choices.isnull().sum(), trips_df.shape[0], i))

        if failfix != FAILFIX_DROP_AND_CLEANUP:
            raise RuntimeError("%s setting '%s' not enabled in settings" %
                               (FAILFIX, FAILFIX_DROP_AND_CLEANUP))

        trips_df['failed'] = choices.isnull()
        trips_df = cleanup_failed_trips(trips_df)
        choices = choices.reindex(trips_df.index)

    trips_df['depart'] = choices

    assert not trips_df.depart.isnull().any()

    pipeline.replace_table("trips", trips_df)
def trip_purpose_and_destination(
        trips,
        tours_merged,
        chunk_size,
        trace_hh_id):

    trace_label = "trip_purpose_and_destination"
    model_settings = config.read_model_settings('trip_purpose_and_destination.yaml')

    MAX_ITERATIONS = model_settings.get('MAX_ITERATIONS', 5)

    trips_df = trips.to_frame()
    tours_merged_df = tours_merged.to_frame()

    if trips_df.empty:
        logger.info("%s - no trips. Nothing to do." % trace_label)
        return

    # FIXME could allow MAX_ITERATIONS=0 to allow for cleanup-only run
    # in which case, we would need to drop bad trips, WITHOUT failing bad_trip leg_mates
    assert (MAX_ITERATIONS > 0)

    # if trip_destination has been run before, keep only failed trips (and leg_mates) to retry
    if 'destination' in trips_df:
        if trips_df.failed.any():
            logger.info('trip_destination has already been run. Rerunning failed trips')
            flag_failed_trip_leg_mates(trips_df, 'failed')
            trips_df = trips_df[trips_df.failed]
            tours_merged_df = tours_merged_df[tours_merged_df.index.isin(trips_df.tour_id)]
            logger.info('Rerunning %s failed trips and leg-mates' % trips_df.shape[0])
        else:
            # no failed trips from prior run of trip_destination
            logger.info("%s - no failed trips from prior model run." % trace_label)
            del trips_df['failed']
            pipeline.replace_table("trips", trips_df)
            return

    results = []
    i = 0
    RESULT_COLUMNS = ['purpose', 'destination', 'origin', 'failed']
    while True:

        i += 1

        for c in RESULT_COLUMNS:
            if c in trips_df:
                del trips_df[c]

        trips_df = run_trip_purpose_and_destination(
            trips_df,
            tours_merged_df,
            chunk_size,
            trace_hh_id,
            trace_label=tracing.extend_trace_label(trace_label, "i%s" % i))

        num_failed_trips = trips_df.failed.sum()

        # if there were no failed trips, we are done
        if num_failed_trips == 0:
            results.append(trips_df[RESULT_COLUMNS])
            break

        logger.warning("%s %s failed trips in iteration %s" % (trace_label, num_failed_trips, i))
        file_name = "%s_i%s_failed_trips" % (trace_label, i)
        logger.info("writing failed trips to %s" % file_name)
        tracing.write_csv(trips_df[trips_df.failed], file_name=file_name, transpose=False)

        # if max iterations reached, add remaining trips to results and give up
        # note that we do this BEFORE failing leg_mates so resulting trip legs are complete
        if i >= MAX_ITERATIONS:
            logger.warning("%s too many iterations %s" % (trace_label, i))
            results.append(trips_df[RESULT_COLUMNS])
            break

        # otherwise, if any trips failed, then their leg-mates trips must also fail
        flag_failed_trip_leg_mates(trips_df, 'failed')

        # add the good trips to results
        results.append(trips_df[~trips_df.failed][RESULT_COLUMNS])

        # and keep the failed ones to retry
        trips_df = trips_df[trips_df.failed]
        tours_merged_df = tours_merged_df[tours_merged_df.index.isin(trips_df.tour_id)]

    # - assign result columns to trips
    results = pd.concat(results)

    logger.info("%s %s failed trips after %s iterations" % (trace_label, results.failed.sum(), i))

    trips_df = trips.to_frame()
    assign_in_place(trips_df, results)

    trips_df = cleanup_failed_trips(trips_df)

    pipeline.replace_table("trips", trips_df)

    if trace_hh_id:
        tracing.trace_df(trips_df,
                         label=trace_label,
                         slicer='trip_id',
                         index_label='trip_id',
                         warn_if_empty=True)
def trip_purpose_and_destination(trips, tours_merged, chunk_size, trace_hh_id):

    trace_label = "trip_purpose_and_destination"
    model_settings = config.read_model_settings(
        'trip_purpose_and_destination.yaml')

    # for consistency, read sample_table_name setting from trip_destination settings file
    trip_destination_model_settings = config.read_model_settings(
        'trip_destination.yaml')
    sample_table_name = trip_destination_model_settings.get(
        'DEST_CHOICE_SAMPLE_TABLE_NAME')
    want_sample_table = config.setting(
        'want_dest_choice_sample_tables') and sample_table_name is not None

    MAX_ITERATIONS = model_settings.get('MAX_ITERATIONS', 5)

    trips_df = trips.to_frame()
    tours_merged_df = tours_merged.to_frame()

    if trips_df.empty:
        logger.info("%s - no trips. Nothing to do." % trace_label)
        return

    # FIXME could allow MAX_ITERATIONS=0 to allow for cleanup-only run
    # in which case, we would need to drop bad trips, WITHOUT failing bad_trip leg_mates
    assert (MAX_ITERATIONS > 0)

    # if trip_destination has been run before, keep only failed trips (and leg_mates) to retry
    if 'destination' in trips_df:

        if 'failed' not in trips_df.columns:
            # trip_destination model cleaned up any failed trips
            logger.info("%s - no failed column from prior model run." %
                        trace_label)
            return

        elif not trips_df.failed.any():
            # 'failed' column but no failed trips from prior run of trip_destination
            logger.info("%s - no failed trips from prior model run." %
                        trace_label)
            trips_df.drop(columns='failed', inplace=True)
            pipeline.replace_table("trips", trips_df)
            return

        else:
            logger.info(
                "trip_destination has already been run. Rerunning failed trips"
            )
            flag_failed_trip_leg_mates(trips_df, 'failed')
            trips_df = trips_df[trips_df.failed]
            tours_merged_df = tours_merged_df[tours_merged_df.index.isin(
                trips_df.tour_id)]
            logger.info("Rerunning %s failed trips and leg-mates" %
                        trips_df.shape[0])

            # drop any previously saved samples of failed trips
            if want_sample_table and pipeline.is_table(sample_table_name):
                logger.info(
                    "Dropping any previously saved samples of failed trips")
                save_sample_df = pipeline.get_table(sample_table_name)
                save_sample_df.drop(trips_df.index,
                                    level='trip_id',
                                    inplace=True)
                pipeline.replace_table(sample_table_name, save_sample_df)
                del save_sample_df

    # if we estimated trip_destination, there should have been no failed trips
    # if we didn't, but it is enabled, it is probably a configuration error
    # if we just estimated trip_purpose, it isn't clear what they are trying to do , nor how to handle it
    assert not (estimation.manager.begin_estimation('trip_purpose')
                or estimation.manager.begin_estimation('trip_destination'))

    processed_trips = []
    save_samples = []
    i = 0
    TRIP_RESULT_COLUMNS = ['purpose', 'destination', 'origin', 'failed']
    while True:

        i += 1

        for c in TRIP_RESULT_COLUMNS:
            if c in trips_df:
                del trips_df[c]

        trips_df, save_sample_df = run_trip_purpose_and_destination(
            trips_df,
            tours_merged_df,
            chunk_size=chunk_size,
            trace_hh_id=trace_hh_id,
            trace_label=tracing.extend_trace_label(trace_label, "i%s" % i))

        # # if testing, make sure at least one trip fails
        if config.setting('testing_fail_trip_destination', False) \
                and (i == 1) and not trips_df.failed.any():
            fail_o = trips_df[
                trips_df.trip_num < trips_df.trip_count].origin.max()
            trips_df.failed = (trips_df.origin == fail_o) & \
                              (trips_df.trip_num < trips_df.trip_count)

        num_failed_trips = trips_df.failed.sum()

        # if there were no failed trips, we are done
        if num_failed_trips == 0:
            processed_trips.append(trips_df[TRIP_RESULT_COLUMNS])
            if save_sample_df is not None:
                save_samples.append(save_sample_df)
            break

        logger.warning("%s %s failed trips in iteration %s" %
                       (trace_label, num_failed_trips, i))
        file_name = "%s_i%s_failed_trips" % (trace_label, i)
        logger.info("writing failed trips to %s" % file_name)
        tracing.write_csv(trips_df[trips_df.failed],
                          file_name=file_name,
                          transpose=False)

        # if max iterations reached, add remaining trips to processed_trips and give up
        # note that we do this BEFORE failing leg_mates so resulting trip legs are complete
        if i >= MAX_ITERATIONS:
            logger.warning("%s too many iterations %s" % (trace_label, i))
            processed_trips.append(trips_df[TRIP_RESULT_COLUMNS])
            if save_sample_df is not None:
                save_sample_df.drop(trips_df[trips_df.failed].index,
                                    level='trip_id',
                                    inplace=True)
                save_samples.append(save_sample_df)
            break

        # otherwise, if any trips failed, then their leg-mates trips must also fail
        flag_failed_trip_leg_mates(trips_df, 'failed')

        # add the good trips to processed_trips
        processed_trips.append(trips_df[~trips_df.failed][TRIP_RESULT_COLUMNS])

        # and keep the failed ones to retry
        trips_df = trips_df[trips_df.failed]
        tours_merged_df = tours_merged_df[tours_merged_df.index.isin(
            trips_df.tour_id)]

        #  add trip samples of processed_trips to processed_samples
        if save_sample_df is not None:
            # drop failed trip samples
            save_sample_df.drop(trips_df.index, level='trip_id', inplace=True)
            save_samples.append(save_sample_df)

    # - assign result columns to trips
    processed_trips = pd.concat(processed_trips)

    if len(save_samples) > 0:
        save_sample_df = pd.concat(save_samples)
        logger.info("adding %s samples to %s" %
                    (len(save_sample_df), sample_table_name))
        pipeline.extend_table(sample_table_name, save_sample_df)

    logger.info("%s %s failed trips after %s iterations" %
                (trace_label, processed_trips.failed.sum(), i))

    trips_df = trips.to_frame()
    assign_in_place(trips_df, processed_trips)

    trips_df = cleanup_failed_trips(trips_df)

    pipeline.replace_table("trips", trips_df)

    # check to make sure we wrote sample file if requestsd
    if want_sample_table and len(trips_df) > 0:
        assert pipeline.is_table(sample_table_name)
        # since we have saved samples for all successful trips
        # once we discard failed trips, we should samples for all trips
        save_sample_df = pipeline.get_table(sample_table_name)
        # expect samples only for intermediate trip destinatinos
        assert \
            len(save_sample_df.index.get_level_values(0).unique()) == \
            len(trips_df[trips_df.trip_num < trips_df.trip_count])
        del save_sample_df

    if trace_hh_id:
        tracing.trace_df(trips_df,
                         label=trace_label,
                         slicer='trip_id',
                         index_label='trip_id',
                         warn_if_empty=True)
def trip_destination(
        trips,
        tours_merged,
        chunk_size, trace_hh_id):
    """
    Choose a destination for all 'intermediate' trips based on trip purpose.

    Final trips already have a destination (the primary tour destination for outbound trips,
    and home for inbound trips.)


    """
    trace_label = 'trip_destination'
    model_settings = config.read_model_settings('trip_destination.yaml')
    CLEANUP = model_settings.get('CLEANUP', True)
    fail_some_trips_for_testing = model_settings.get('fail_some_trips_for_testing', False)

    trips_df = trips.to_frame()
    tours_merged_df = tours_merged.to_frame()

    logger.info("Running %s with %d trips", trace_label, trips_df.shape[0])

    trips_df, save_sample_df = run_trip_destination(
        trips_df,
        tours_merged_df,
        chunk_size=chunk_size,
        trace_hh_id=trace_hh_id,
        trace_label=trace_label,
        fail_some_trips_for_testing=fail_some_trips_for_testing)

    # testing feature t0 make sure at least one trip fails so trip_purpose_and_destination model is run
    if config.setting('testing_fail_trip_destination', False) and not trips_df.failed.any():
        fail_o = trips_df[trips_df.trip_num < trips_df.trip_count].origin.max()
        trips_df.failed = (trips_df.origin == fail_o) & \
                          (trips_df.trip_num < trips_df.trip_count)

    if trips_df.failed.any():
        logger.warning("%s %s failed trips", trace_label, trips_df.failed.sum())
        file_name = "%s_failed_trips" % trace_label
        logger.info("writing failed trips to %s", file_name)
        tracing.write_csv(trips_df[trips_df.failed], file_name=file_name, transpose=False)

    if CLEANUP:

        if trips_df.failed.any():
            flag_failed_trip_leg_mates(trips_df, 'failed')

            if save_sample_df is not None:
                save_sample_df.drop(trips_df.index[trips_df.failed], level='trip_id', inplace=True)

            trips_df = cleanup_failed_trips(trips_df)

        trips_df.drop(columns='failed', inplace=True, errors='ignore')

    pipeline.replace_table("trips", trips_df)

    if trace_hh_id:
        tracing.trace_df(trips_df,
                         label=trace_label,
                         slicer='trip_id',
                         index_label='trip_id',
                         warn_if_empty=True)

    if save_sample_df is not None:
        # might be none if want_sample_table but there are no intermediate trips
        # expect samples only for intermediate trip destinations

        assert len(save_sample_df.index.get_level_values(0).unique()) == \
               len(trips_df[trips_df.trip_num < trips_df.trip_count])

        sample_table_name = model_settings.get('DEST_CHOICE_SAMPLE_TABLE_NAME')
        assert sample_table_name is not None

        logger.info("adding %s samples to %s" % (len(save_sample_df), sample_table_name))

        # lest they try to put tour samples into the same table
        if pipeline.is_table(sample_table_name):
            raise RuntimeError("sample table %s already exists" % sample_table_name)
        pipeline.extend_table(sample_table_name, save_sample_df)
def trip_purpose_and_destination(trips, tours_merged, chunk_size, trace_hh_id):

    trace_label = "trip_purpose_and_destination"
    model_settings = config.read_model_settings(
        'trip_purpose_and_destination.yaml')

    MAX_ITERATIONS = model_settings.get('MAX_ITERATIONS', 5)

    trips_df = trips.to_frame()
    tours_merged_df = tours_merged.to_frame()

    if trips_df.empty:
        logger.info("%s - no trips. Nothing to do." % trace_label)
        return

    # FIXME could allow MAX_ITERATIONS=0 to allow for cleanup-only run
    # in which case, we would need to drop bad trips, WITHOUT failing bad_trip leg_mates
    assert (MAX_ITERATIONS > 0)

    # if trip_destination has been run before, keep only failed trips (and leg_mates) to retry
    if 'destination' in trips_df:
        if trips_df.failed.any():
            logger.info(
                'trip_destination has already been run. Rerunning failed trips'
            )
            flag_failed_trip_leg_mates(trips_df, 'failed')
            trips_df = trips_df[trips_df.failed]
            tours_merged_df = tours_merged_df[tours_merged_df.index.isin(
                trips_df.tour_id)]
            logger.info('Rerunning %s failed trips and leg-mates' %
                        trips_df.shape[0])
        else:
            # no failed trips from prior run of trip_destination
            logger.info("%s - no failed trips from prior model run." %
                        trace_label)
            del trips_df['failed']
            pipeline.replace_table("trips", trips_df)
            return

    results = []
    i = 0
    RESULT_COLUMNS = ['purpose', 'destination', 'origin', 'failed']
    while True:

        i += 1

        for c in RESULT_COLUMNS:
            if c in trips_df:
                del trips_df[c]

        trips_df = run_trip_purpose_and_destination(
            trips_df,
            tours_merged_df,
            chunk_size,
            trace_hh_id,
            trace_label=tracing.extend_trace_label(trace_label, "i%s" % i))

        num_failed_trips = trips_df.failed.sum()

        # if there were no failed trips, we are done
        if num_failed_trips == 0:
            results.append(trips_df[RESULT_COLUMNS])
            break

        logger.warning("%s %s failed trips in iteration %s" %
                       (trace_label, num_failed_trips, i))
        file_name = "%s_i%s_failed_trips" % (trace_label, i)
        logger.info("writing failed trips to %s" % file_name)
        tracing.write_csv(trips_df[trips_df.failed],
                          file_name=file_name,
                          transpose=False)

        # if max iterations reached, add remaining trips to results and give up
        # note that we do this BEFORE failing leg_mates so resulting trip legs are complete
        if i >= MAX_ITERATIONS:
            logger.warning("%s too many iterations %s" % (trace_label, i))
            results.append(trips_df[RESULT_COLUMNS])
            break

        # otherwise, if any trips failed, then their leg-mates trips must also fail
        flag_failed_trip_leg_mates(trips_df, 'failed')

        # add the good trips to results
        results.append(trips_df[~trips_df.failed][RESULT_COLUMNS])

        # and keep the failed ones to retry
        trips_df = trips_df[trips_df.failed]
        tours_merged_df = tours_merged_df[tours_merged_df.index.isin(
            trips_df.tour_id)]

    # - assign result columns to trips
    results = pd.concat(results)

    logger.info("%s %s failed trips after %s iterations" %
                (trace_label, results.failed.sum(), i))

    trips_df = trips.to_frame()
    assign_in_place(trips_df, results)

    trips_df = cleanup_failed_trips(trips_df)

    pipeline.replace_table("trips", trips_df)

    if trace_hh_id:
        tracing.trace_df(trips_df,
                         label=trace_label,
                         slicer='trip_id',
                         index_label='trip_id',
                         warn_if_empty=True)
Beispiel #8
0
def trip_scheduling(trips, tours, chunk_size, trace_hh_id):
    """
    Trip scheduling assigns depart times for trips within the start, end limits of the tour.

    The algorithm is simplistic:

    The first outbound trip starts at the tour start time, and subsequent outbound trips are
    processed in trip_num order, to ensure that subsequent trips do not depart before the
    trip that preceeds them.

    Inbound trips are handled similarly, except in reverse order, starting with the last trip,
    and working backwards to ensure that inbound trips do not depart after the trip that
    succeeds them.

    The probability spec assigns probabilities for depart times, but those possible departs must
    be clipped to disallow depart times outside the tour limits, the departs of prior trips, and
    in the case of work tours, the start/end times of any atwork subtours.

    Scheduling can fail if the probability table assigns zero probabilities to all the available
    depart times in a trip's depart window. (This could be avoided by giving every window a small
    probability, rather than zero, but the existing mtctm1 prob spec does not do this. I believe
    this is due to the its having been generated from a small household travel survey sample
    that lacked any departs for some time periods.)

    Rescheduling the trips that fail (along with their inbound or outbound leg-mates) can sometimes
    fix this problem, if it was caused by an earlier trip's depart choice blocking a subsequent
    trip's ability to schedule a depart within the resulting window. But it can also happen if
    a tour is very short (e.g. one time period) and the prob spec having a zero probability for
    that tour hour.

    Therefor we need to handle trips that could not be scheduled. There are two ways (at least)
    to solve this problem:

    1) CHOOSE_MOST_INITIAL
    simply assign a depart time to the trip, even if it has a zero probability. It makes
    most sense, in this case, to assign the 'most initial' depart time, so that subsequent trips
    are minimally impacted. This can be done in the final iteration, thus affecting only the
    trips that could no be scheduled by the standard approach

    2) drop_and_cleanup
    drop trips that could no be scheduled, and adjust their leg mates, as is done for failed
    trips in trip_destination.

    For now we are choosing among these approaches with a manifest constant, but this could
    be made a model setting...

    """
    trace_label = "trip_scheduling"

    model_settings = config.read_model_settings('trip_scheduling.yaml')
    assert 'DEPART_ALT_BASE' in model_settings

    failfix = model_settings.get(FAILFIX, FAILFIX_DEFAULT)

    probs_spec = pd.read_csv(
        config.config_file_path('trip_scheduling_probs.csv'), comment='#')

    trips_df = trips.to_frame()
    tours = tours.to_frame()

    # add tour-based chunk_id so we can chunk all trips in tour together
    trips_df['chunk_id'] = \
        reindex(pd.Series(list(range(tours.shape[0])), tours.index), trips_df.tour_id)

    max_iterations = model_settings.get('MAX_ITERATIONS', 1)
    assert max_iterations > 0

    choices_list = []
    i = 0
    while (i < max_iterations) and not trips_df.empty:

        i += 1
        last_iteration = (i == max_iterations)

        trace_label_i = tracing.extend_trace_label(trace_label, "i%s" % i)
        logger.info("%s scheduling %s trips", trace_label_i, trips_df.shape[0])

        choices = \
            run_trip_scheduling(
                trips_df,
                tours,
                probs_spec,
                model_settings,
                last_iteration=last_iteration,
                trace_hh_id=trace_hh_id,
                chunk_size=chunk_size,
                trace_label=trace_label_i)

        # boolean series of trips whose individual trip scheduling failed
        failed = choices.reindex(trips_df.index).isnull()
        logger.info("%s %s failed", trace_label_i, failed.sum())

        if not last_iteration:
            # boolean series of trips whose leg scheduling failed
            failed_cohorts = failed_trip_cohorts(trips_df, failed)
            trips_df = trips_df[failed_cohorts]
            choices = choices[~failed_cohorts]

        choices_list.append(choices)

    trips_df = trips.to_frame()

    choices = pd.concat(choices_list)
    choices = choices.reindex(trips_df.index)
    if choices.isnull().any():
        logger.warning(
            "%s of %s trips could not be scheduled after %s iterations" %
            (choices.isnull().sum(), trips_df.shape[0], i))

        if failfix != FAILFIX_DROP_AND_CLEANUP:
            raise RuntimeError("%s setting '%s' not enabled in settings" %
                               (FAILFIX, FAILFIX_DROP_AND_CLEANUP))

        trips_df['failed'] = choices.isnull()
        trips_df = cleanup_failed_trips(trips_df)
        choices = choices.reindex(trips_df.index)

    trips_df['depart'] = choices

    assert not trips_df.depart.isnull().any()

    pipeline.replace_table("trips", trips_df)