Beispiel #1
0
def trip_destination(trips, tours_merged, chunk_size, trace_hh_id):
    """
    Choose a destination for all 'intermediate' trips based on trip purpose.

    Final trips already have a destination (the primary tour destination for outbound trips,
    and home for inbound trips.)


    """
    trace_label = 'trip_destination'

    model_settings_file_name = 'trip_destination.yaml'
    model_settings = config.read_model_settings(model_settings_file_name)

    CLEANUP = model_settings.get('CLEANUP', True)
    fail_some_trips_for_testing = model_settings.get(
        'fail_some_trips_for_testing', False)

    trips_df = trips.to_frame()
    tours_merged_df = tours_merged.to_frame()

    estimator = estimation.manager.begin_estimation('trip_destination')

    if estimator:
        estimator.write_coefficients(model_settings=model_settings)
        # estimator.write_spec(model_settings, tag='SAMPLE_SPEC')
        estimator.write_spec(model_settings, tag='SPEC')
        estimator.set_alt_id(model_settings["ALT_DEST_COL_NAME"])
        estimator.write_table(inject.get_injectable('size_terms'),
                              'size_terms',
                              append=False)
        estimator.write_table(inject.get_table('land_use').to_frame(),
                              'landuse',
                              append=False)
        estimator.write_model_settings(model_settings,
                                       model_settings_file_name)

    logger.info("Running %s with %d trips", trace_label, trips_df.shape[0])

    trips_df, save_sample_df = run_trip_destination(
        trips_df,
        tours_merged_df,
        estimator=estimator,
        chunk_size=chunk_size,
        trace_hh_id=trace_hh_id,
        trace_label=trace_label,
        fail_some_trips_for_testing=fail_some_trips_for_testing)

    # testing feature t0 make sure at least one trip fails so trip_purpose_and_destination model is run
    if config.setting('testing_fail_trip_destination',
                      False) and not trips_df.failed.any():
        if (trips_df.trip_num < trips_df.trip_count).sum() == 0:
            raise RuntimeError(
                f"can't honor 'testing_fail_trip_destination' setting because no intermediate trips"
            )

        fail_o = trips_df[trips_df.trip_num < trips_df.trip_count].origin.max()
        trips_df.failed = (trips_df.origin == fail_o) & \
                          (trips_df.trip_num < trips_df.trip_count)

    if trips_df.failed.any():
        logger.warning("%s %s failed trips", trace_label,
                       trips_df.failed.sum())
        if inject.get_injectable('pipeline_file_prefix', None):
            file_name = f"{trace_label}_failed_trips_{inject.get_injectable('pipeline_file_prefix')}"
        else:
            file_name = f"{trace_label}_failed_trips"
        logger.info("writing failed trips to %s", file_name)
        tracing.write_csv(trips_df[trips_df.failed],
                          file_name=file_name,
                          transpose=False)

    if estimator:
        estimator.end_estimation()
        # no trips should have failed since we overwrite choices and sample should have not failed trips
        assert not trips_df.failed.any()

    if CLEANUP:

        if trips_df.failed.any():
            flag_failed_trip_leg_mates(trips_df, 'failed')

            if save_sample_df is not None:
                save_sample_df.drop(trips_df.index[trips_df.failed],
                                    level='trip_id',
                                    inplace=True)

            trips_df = cleanup_failed_trips(trips_df)

        trips_df.drop(columns='failed', inplace=True, errors='ignore')

    pipeline.replace_table("trips", trips_df)

    if trace_hh_id:
        tracing.trace_df(trips_df,
                         label=trace_label,
                         slicer='trip_id',
                         index_label='trip_id',
                         warn_if_empty=True)

    if save_sample_df is not None:
        # might be none if want_sample_table but there are no intermediate trips
        # expect samples only for intermediate trip destinations

        assert len(save_sample_df.index.get_level_values(0).unique()) == \
               len(trips_df[trips_df.trip_num < trips_df.trip_count])

        sample_table_name = model_settings.get('DEST_CHOICE_SAMPLE_TABLE_NAME')
        assert sample_table_name is not None

        logger.info("adding %s samples to %s" %
                    (len(save_sample_df), sample_table_name))

        # lest they try to put tour samples into the same table
        if pipeline.is_table(sample_table_name):
            raise RuntimeError("sample table %s already exists" %
                               sample_table_name)
        pipeline.extend_table(sample_table_name, save_sample_df)
def trip_purpose_and_destination(trips, tours_merged, chunk_size, trace_hh_id):

    trace_label = "trip_purpose_and_destination"
    model_settings = config.read_model_settings(
        'trip_purpose_and_destination.yaml')

    # for consistency, read sample_table_name setting from trip_destination settings file
    trip_destination_model_settings = config.read_model_settings(
        'trip_destination.yaml')
    sample_table_name = trip_destination_model_settings.get(
        'DEST_CHOICE_SAMPLE_TABLE_NAME')
    want_sample_table = config.setting(
        'want_dest_choice_sample_tables') and sample_table_name is not None

    MAX_ITERATIONS = model_settings.get('MAX_ITERATIONS', 5)

    trips_df = trips.to_frame()
    tours_merged_df = tours_merged.to_frame()

    if trips_df.empty:
        logger.info("%s - no trips. Nothing to do." % trace_label)
        return

    # FIXME could allow MAX_ITERATIONS=0 to allow for cleanup-only run
    # in which case, we would need to drop bad trips, WITHOUT failing bad_trip leg_mates
    assert (MAX_ITERATIONS > 0)

    # if trip_destination has been run before, keep only failed trips (and leg_mates) to retry
    if 'destination' in trips_df:

        if 'failed' not in trips_df.columns:
            # trip_destination model cleaned up any failed trips
            logger.info("%s - no failed column from prior model run." %
                        trace_label)
            return

        elif not trips_df.failed.any():
            # 'failed' column but no failed trips from prior run of trip_destination
            logger.info("%s - no failed trips from prior model run." %
                        trace_label)
            trips_df.drop(columns='failed', inplace=True)
            pipeline.replace_table("trips", trips_df)
            return

        else:
            logger.info(
                "trip_destination has already been run. Rerunning failed trips"
            )
            flag_failed_trip_leg_mates(trips_df, 'failed')
            trips_df = trips_df[trips_df.failed]
            tours_merged_df = tours_merged_df[tours_merged_df.index.isin(
                trips_df.tour_id)]
            logger.info("Rerunning %s failed trips and leg-mates" %
                        trips_df.shape[0])

            # drop any previously saved samples of failed trips
            if want_sample_table and pipeline.is_table(sample_table_name):
                logger.info(
                    "Dropping any previously saved samples of failed trips")
                save_sample_df = pipeline.get_table(sample_table_name)
                save_sample_df.drop(trips_df.index,
                                    level='trip_id',
                                    inplace=True)
                pipeline.replace_table(sample_table_name, save_sample_df)
                del save_sample_df

    # if we estimated trip_destination, there should have been no failed trips
    # if we didn't, but it is enabled, it is probably a configuration error
    # if we just estimated trip_purpose, it isn't clear what they are trying to do , nor how to handle it
    assert not (estimation.manager.begin_estimation('trip_purpose')
                or estimation.manager.begin_estimation('trip_destination'))

    processed_trips = []
    save_samples = []
    i = 0
    TRIP_RESULT_COLUMNS = ['purpose', 'destination', 'origin', 'failed']
    while True:

        i += 1

        for c in TRIP_RESULT_COLUMNS:
            if c in trips_df:
                del trips_df[c]

        trips_df, save_sample_df = run_trip_purpose_and_destination(
            trips_df,
            tours_merged_df,
            chunk_size=chunk_size,
            trace_hh_id=trace_hh_id,
            trace_label=tracing.extend_trace_label(trace_label, "i%s" % i))

        # # if testing, make sure at least one trip fails
        if config.setting('testing_fail_trip_destination', False) \
                and (i == 1) and not trips_df.failed.any():
            fail_o = trips_df[
                trips_df.trip_num < trips_df.trip_count].origin.max()
            trips_df.failed = (trips_df.origin == fail_o) & \
                              (trips_df.trip_num < trips_df.trip_count)

        num_failed_trips = trips_df.failed.sum()

        # if there were no failed trips, we are done
        if num_failed_trips == 0:
            processed_trips.append(trips_df[TRIP_RESULT_COLUMNS])
            if save_sample_df is not None:
                save_samples.append(save_sample_df)
            break

        logger.warning("%s %s failed trips in iteration %s" %
                       (trace_label, num_failed_trips, i))
        file_name = "%s_i%s_failed_trips" % (trace_label, i)
        logger.info("writing failed trips to %s" % file_name)
        tracing.write_csv(trips_df[trips_df.failed],
                          file_name=file_name,
                          transpose=False)

        # if max iterations reached, add remaining trips to processed_trips and give up
        # note that we do this BEFORE failing leg_mates so resulting trip legs are complete
        if i >= MAX_ITERATIONS:
            logger.warning("%s too many iterations %s" % (trace_label, i))
            processed_trips.append(trips_df[TRIP_RESULT_COLUMNS])
            if save_sample_df is not None:
                save_sample_df.drop(trips_df[trips_df.failed].index,
                                    level='trip_id',
                                    inplace=True)
                save_samples.append(save_sample_df)
            break

        # otherwise, if any trips failed, then their leg-mates trips must also fail
        flag_failed_trip_leg_mates(trips_df, 'failed')

        # add the good trips to processed_trips
        processed_trips.append(trips_df[~trips_df.failed][TRIP_RESULT_COLUMNS])

        # and keep the failed ones to retry
        trips_df = trips_df[trips_df.failed]
        tours_merged_df = tours_merged_df[tours_merged_df.index.isin(
            trips_df.tour_id)]

        #  add trip samples of processed_trips to processed_samples
        if save_sample_df is not None:
            # drop failed trip samples
            save_sample_df.drop(trips_df.index, level='trip_id', inplace=True)
            save_samples.append(save_sample_df)

    # - assign result columns to trips
    processed_trips = pd.concat(processed_trips)

    if len(save_samples) > 0:
        save_sample_df = pd.concat(save_samples)
        logger.info("adding %s samples to %s" %
                    (len(save_sample_df), sample_table_name))
        pipeline.extend_table(sample_table_name, save_sample_df)

    logger.info("%s %s failed trips after %s iterations" %
                (trace_label, processed_trips.failed.sum(), i))

    trips_df = trips.to_frame()
    assign_in_place(trips_df, processed_trips)

    trips_df = cleanup_failed_trips(trips_df)

    pipeline.replace_table("trips", trips_df)

    # check to make sure we wrote sample file if requestsd
    if want_sample_table and len(trips_df) > 0:
        assert pipeline.is_table(sample_table_name)
        # since we have saved samples for all successful trips
        # once we discard failed trips, we should samples for all trips
        save_sample_df = pipeline.get_table(sample_table_name)
        # expect samples only for intermediate trip destinatinos
        assert \
            len(save_sample_df.index.get_level_values(0).unique()) == \
            len(trips_df[trips_df.trip_num < trips_df.trip_count])
        del save_sample_df

    if trace_hh_id:
        tracing.trace_df(trips_df,
                         label=trace_label,
                         slicer='trip_id',
                         index_label='trip_id',
                         warn_if_empty=True)
Beispiel #3
0
def parking_location(trips, trips_merged, land_use, network_los, chunk_size,
                     trace_hh_id):
    """
    Given a set of trips, each trip needs to have a parking location if
    it is eligible for remote parking.
    """

    trace_label = 'parking_location'
    model_settings = config.read_model_settings('parking_location_choice.yaml')
    alt_destination_col_name = model_settings['ALT_DEST_COL_NAME']

    preprocessor_settings = model_settings.get('PREPROCESSOR', None)

    trips_df = trips.to_frame()
    trips_merged_df = trips_merged.to_frame()
    land_use_df = land_use.to_frame()

    locals_dict = {'network_los': network_los}
    locals_dict.update(config.get_model_constants(model_settings))

    if preprocessor_settings:
        expressions.assign_columns(df=trips_merged_df,
                                   model_settings=preprocessor_settings,
                                   locals_dict=locals_dict,
                                   trace_label=trace_label)

    parking_locations, save_sample_df = run_parking_destination(
        model_settings,
        trips_merged_df,
        land_use_df,
        chunk_size=chunk_size,
        trace_hh_id=trace_hh_id,
        trace_label=trace_label,
    )

    assign_in_place(trips_df,
                    parking_locations.to_frame(alt_destination_col_name))

    pipeline.replace_table("trips", trips_df)

    if trace_hh_id:
        tracing.trace_df(trips_df,
                         label=trace_label,
                         slicer='trip_id',
                         index_label='trip_id',
                         warn_if_empty=True)

    if save_sample_df is not None:
        assert len(save_sample_df.index.get_level_values(0).unique()) == \
               len(trips_df[trips_df.trip_num < trips_df.trip_count])

        sample_table_name = model_settings.get(
            'PARKING_LOCATION_SAMPLE_TABLE_NAME')
        assert sample_table_name is not None

        logger.info("adding %s samples to %s" %
                    (len(save_sample_df), sample_table_name))

        # lest they try to put tour samples into the same table
        if pipeline.is_table(sample_table_name):
            raise RuntimeError("sample table %s already exists" %
                               sample_table_name)
        pipeline.extend_table(sample_table_name, save_sample_df)
def trip_destination(
        trips,
        tours_merged,
        chunk_size, trace_hh_id):
    """
    Choose a destination for all 'intermediate' trips based on trip purpose.

    Final trips already have a destination (the primary tour destination for outbound trips,
    and home for inbound trips.)


    """
    trace_label = 'trip_destination'
    model_settings = config.read_model_settings('trip_destination.yaml')
    CLEANUP = model_settings.get('CLEANUP', True)
    fail_some_trips_for_testing = model_settings.get('fail_some_trips_for_testing', False)

    trips_df = trips.to_frame()
    tours_merged_df = tours_merged.to_frame()

    logger.info("Running %s with %d trips", trace_label, trips_df.shape[0])

    trips_df, save_sample_df = run_trip_destination(
        trips_df,
        tours_merged_df,
        chunk_size=chunk_size,
        trace_hh_id=trace_hh_id,
        trace_label=trace_label,
        fail_some_trips_for_testing=fail_some_trips_for_testing)

    # testing feature t0 make sure at least one trip fails so trip_purpose_and_destination model is run
    if config.setting('testing_fail_trip_destination', False) and not trips_df.failed.any():
        fail_o = trips_df[trips_df.trip_num < trips_df.trip_count].origin.max()
        trips_df.failed = (trips_df.origin == fail_o) & \
                          (trips_df.trip_num < trips_df.trip_count)

    if trips_df.failed.any():
        logger.warning("%s %s failed trips", trace_label, trips_df.failed.sum())
        file_name = "%s_failed_trips" % trace_label
        logger.info("writing failed trips to %s", file_name)
        tracing.write_csv(trips_df[trips_df.failed], file_name=file_name, transpose=False)

    if CLEANUP:

        if trips_df.failed.any():
            flag_failed_trip_leg_mates(trips_df, 'failed')

            if save_sample_df is not None:
                save_sample_df.drop(trips_df.index[trips_df.failed], level='trip_id', inplace=True)

            trips_df = cleanup_failed_trips(trips_df)

        trips_df.drop(columns='failed', inplace=True, errors='ignore')

    pipeline.replace_table("trips", trips_df)

    if trace_hh_id:
        tracing.trace_df(trips_df,
                         label=trace_label,
                         slicer='trip_id',
                         index_label='trip_id',
                         warn_if_empty=True)

    if save_sample_df is not None:
        # might be none if want_sample_table but there are no intermediate trips
        # expect samples only for intermediate trip destinations

        assert len(save_sample_df.index.get_level_values(0).unique()) == \
               len(trips_df[trips_df.trip_num < trips_df.trip_count])

        sample_table_name = model_settings.get('DEST_CHOICE_SAMPLE_TABLE_NAME')
        assert sample_table_name is not None

        logger.info("adding %s samples to %s" % (len(save_sample_df), sample_table_name))

        # lest they try to put tour samples into the same table
        if pipeline.is_table(sample_table_name):
            raise RuntimeError("sample table %s already exists" % sample_table_name)
        pipeline.extend_table(sample_table_name, save_sample_df)
def iterate_location_choice(model_settings, persons_merged, persons,
                            households, network_los, estimator, chunk_size,
                            trace_hh_id, locutor, trace_label):
    """
    iterate run_location_choice updating shadow pricing until convergence criteria satisfied
    or max_iterations reached.

    (If use_shadow_pricing not enabled, then just iterate once)

    Parameters
    ----------
    model_settings : dict
    persons_merged : injected table
    persons : injected table
    network_los : los.Network_LOS
    chunk_size : int
    trace_hh_id : int
    locutor : bool
        whether this process is the privileged logger of shadow_pricing when multiprocessing
    trace_label : str

    Returns
    -------
    adds choice column model_settings['DEST_CHOICE_COLUMN_NAME']
    adds logsum column model_settings['DEST_CHOICE_LOGSUM_COLUMN_NAME']- if provided
    adds annotations to persons table
    """

    chunk_tag = trace_label

    # boolean to filter out persons not needing location modeling (e.g. is_worker, is_student)
    chooser_filter_column = model_settings['CHOOSER_FILTER_COLUMN_NAME']

    dest_choice_column_name = model_settings['DEST_CHOICE_COLUMN_NAME']
    logsum_column_name = model_settings.get('DEST_CHOICE_LOGSUM_COLUMN_NAME')

    sample_table_name = model_settings.get('DEST_CHOICE_SAMPLE_TABLE_NAME')
    want_sample_table = config.setting(
        'want_dest_choice_sample_tables') and sample_table_name is not None

    persons_merged_df = persons_merged.to_frame()

    persons_merged_df = persons_merged_df[
        persons_merged[chooser_filter_column]]

    persons_merged_df.sort_index(
        inplace=True
    )  # interaction_sample expects chooser index to be monotonic increasing

    # chooser segmentation allows different sets coefficients for e.g. different income_segments or tour_types
    chooser_segment_column = model_settings['CHOOSER_SEGMENT_COLUMN_NAME']

    assert chooser_segment_column in persons_merged_df, \
        f"CHOOSER_SEGMENT_COLUMN '{chooser_segment_column}' not in persons_merged table."

    spc = shadow_pricing.load_shadow_price_calculator(model_settings)
    max_iterations = spc.max_iterations
    assert not (spc.use_shadow_pricing and estimator)

    logger.debug("%s max_iterations: %s" % (trace_label, max_iterations))

    for iteration in range(1, max_iterations + 1):

        if spc.use_shadow_pricing and iteration > 1:
            spc.update_shadow_prices()

        choices_df, save_sample_df = run_location_choice(
            persons_merged_df,
            network_los,
            shadow_price_calculator=spc,
            want_logsums=logsum_column_name is not None,
            want_sample_table=want_sample_table,
            estimator=estimator,
            model_settings=model_settings,
            chunk_size=chunk_size,
            chunk_tag=chunk_tag,
            trace_hh_id=trace_hh_id,
            trace_label=tracing.extend_trace_label(trace_label,
                                                   'i%s' % iteration))

        # choices_df is a pandas DataFrame with columns 'choice' and (optionally) 'logsum'
        if choices_df is None:
            break

        spc.set_choices(
            choices=choices_df['choice'],
            segment_ids=persons_merged_df[chooser_segment_column].reindex(
                choices_df.index))

        if locutor:
            spc.write_trace_files(iteration)

        if spc.use_shadow_pricing and spc.check_fit(iteration):
            logging.info("%s converged after iteration %s" % (
                trace_label,
                iteration,
            ))
            break

    # - shadow price table
    if locutor:
        if spc.use_shadow_pricing and 'SHADOW_PRICE_TABLE' in model_settings:
            inject.add_table(model_settings['SHADOW_PRICE_TABLE'],
                             spc.shadow_prices)
        if 'MODELED_SIZE_TABLE' in model_settings:
            inject.add_table(model_settings['MODELED_SIZE_TABLE'],
                             spc.modeled_size)

    persons_df = persons.to_frame()

    # add the choice values to the dest_choice_column in persons dataframe
    # We only chose school locations for the subset of persons who go to school
    # so we backfill the empty choices with -1 to code as no school location
    # names for location choice and (optional) logsums columns
    NO_DEST_ZONE = -1
    persons_df[dest_choice_column_name] = \
        choices_df['choice'].reindex(persons_df.index).fillna(NO_DEST_ZONE).astype(int)

    # add the dest_choice_logsum column to persons dataframe
    if logsum_column_name:
        persons_df[logsum_column_name] = \
            choices_df['logsum'].reindex(persons_df.index).astype('float')

    if save_sample_df is not None:
        # might be None for tiny samples even if sample_table_name was specified
        assert len(save_sample_df.index.get_level_values(0).unique()) == len(
            choices_df)
        # lest they try to put school and workplace samples into the same table
        if pipeline.is_table(sample_table_name):
            raise RuntimeError("dest choice sample table %s already exists" %
                               sample_table_name)
        pipeline.extend_table(sample_table_name, save_sample_df)

    # - annotate persons table
    if 'annotate_persons' in model_settings:
        expressions.assign_columns(
            df=persons_df,
            model_settings=model_settings.get('annotate_persons'),
            trace_label=tracing.extend_trace_label(trace_label,
                                                   'annotate_persons'))

        pipeline.replace_table("persons", persons_df)

        if trace_hh_id:
            tracing.trace_df(persons_df, label=trace_label, warn_if_empty=True)

    # - annotate households table
    if 'annotate_households' in model_settings:

        households_df = households.to_frame()
        expressions.assign_columns(
            df=households_df,
            model_settings=model_settings.get('annotate_households'),
            trace_label=tracing.extend_trace_label(trace_label,
                                                   'annotate_households'))
        pipeline.replace_table("households", households_df)

        if trace_hh_id:
            tracing.trace_df(households_df,
                             label=trace_label,
                             warn_if_empty=True)

    if logsum_column_name:
        tracing.print_summary(logsum_column_name,
                              choices_df['logsum'],
                              value_counts=True)

    return persons_df