예제 #1
0
def create_mandatory_tours():

    # FIXME - move this to body?

    persons = inject.get_table('persons')
    configs_dir = inject.get_injectable('configs_dir')

    persons = persons.to_frame(columns=[
        "mandatory_tour_frequency", "is_worker", "school_taz", "workplace_taz"
    ])
    persons = persons[~persons.mandatory_tour_frequency.isnull()]

    tour_frequency_alternatives = inject.get_injectable(
        'mandatory_tour_frequency_alternatives')

    tours = process_mandatory_tours(persons, tour_frequency_alternatives)

    expressions.assign_columns(df=tours,
                               model_settings='annotate_tours_with_dest',
                               configs_dir=configs_dir,
                               trace_label='create_mandatory_tours')

    pipeline.extend_table("tours", tours)
    tracing.register_traceable_table('tours', tours)
    pipeline.get_rn_generator().add_channel(tours, 'tours')
def create_non_mandatory_tours():
    """
    We have now generated non-mandatory tours, but they are attributes of the person table
    Now we create a "tours" table which has one row per tour that has been generated
    (and the person id it is associated with)
    """

    persons = inject.get_table('persons')
    alts = inject.get_injectable('non_mandatory_tour_frequency_alts')

    df = process_non_mandatory_tours(
        persons.non_mandatory_tour_frequency.dropna(),
        alts
    )

    pipeline.extend_table("tours", df)
    tracing.register_traceable_table('tours', df)
    pipeline.get_rn_generator().add_channel(df, 'tours')
def create_non_mandatory_tours(trace_hh_id):
    """
    We have now generated non-mandatory tours, but they are attributes of the person table
    Now we create a "tours" table which has one row per tour that has been generated
    (and the person id it is associated with)
    """

    persons = inject.get_table('persons')
    alts = inject.get_injectable('non_mandatory_tour_frequency_alts')

    non_mandatory_tours = process_non_mandatory_tours(
        persons.non_mandatory_tour_frequency.dropna(),
        alts
    )

    tours = pipeline.extend_table("tours", non_mandatory_tours)
    tracing.register_traceable_table('tours', tours)
    pipeline.get_rn_generator().add_channel(non_mandatory_tours, 'tours')

    if trace_hh_id:
        tracing.trace_df(non_mandatory_tours,
                         label="non_mandatory_tour_frequency.non_mandatory_tours",
                         warn_if_empty=True)
예제 #4
0
def atwork_subtour_frequency(tours, persons_merged, chunk_size, trace_hh_id):
    """
    This model predicts the frequency of making at-work subtour tours
    (alternatives for this model come from a separate csv file which is
    configured by the user).
    """

    trace_label = 'atwork_subtour_frequency'

    model_settings = config.read_model_settings(
        'atwork_subtour_frequency.yaml')
    model_spec = simulate.read_model_spec(
        file_name='atwork_subtour_frequency.csv')

    alternatives = simulate.read_model_alts(
        config.config_file_path('atwork_subtour_frequency_alternatives.csv'),
        set_index='alt')

    tours = tours.to_frame()

    persons_merged = persons_merged.to_frame()

    work_tours = tours[tours.tour_type == 'work']

    # - if no work_tours
    if len(work_tours) == 0:
        add_null_results(trace_label, tours)
        return

    # merge persons into work_tours
    work_tours = pd.merge(work_tours,
                          persons_merged,
                          left_on='person_id',
                          right_index=True)

    logger.info("Running atwork_subtour_frequency with %d work tours",
                len(work_tours))

    nest_spec = config.get_logit_model_settings(model_settings)
    constants = config.get_model_constants(model_settings)

    # - preprocessor
    preprocessor_settings = model_settings.get('preprocessor', None)
    if preprocessor_settings:

        assign_columns(df=work_tours,
                       model_settings=preprocessor_settings,
                       trace_label=trace_label)

    choices = simulate.simple_simulate(
        choosers=work_tours,
        spec=model_spec,
        nest_spec=nest_spec,
        locals_d=constants,
        chunk_size=chunk_size,
        trace_label=trace_label,
        trace_choice_name='atwork_subtour_frequency')

    # convert indexes to alternative names
    choices = pd.Series(model_spec.columns[choices.values],
                        index=choices.index)

    tracing.print_summary('atwork_subtour_frequency',
                          choices,
                          value_counts=True)

    # add atwork_subtour_frequency column to tours
    # reindex since we are working with a subset of tours
    tours['atwork_subtour_frequency'] = choices.reindex(tours.index)
    pipeline.replace_table("tours", tours)

    # - create atwork_subtours based on atwork_subtour_frequency choice names
    work_tours = tours[tours.tour_type == 'work']
    assert not work_tours.atwork_subtour_frequency.isnull().any()

    subtours = process_atwork_subtours(work_tours, alternatives)

    tours = pipeline.extend_table("tours", subtours)

    tracing.register_traceable_table('tours', subtours)
    pipeline.get_rn_generator().add_channel('tours', subtours)

    if trace_hh_id:
        tracing.trace_df(tours, label='atwork_subtour_frequency.tours')
def trip_purpose_and_destination(trips, tours_merged, chunk_size, trace_hh_id):

    trace_label = "trip_purpose_and_destination"
    model_settings = config.read_model_settings(
        'trip_purpose_and_destination.yaml')

    # for consistency, read sample_table_name setting from trip_destination settings file
    trip_destination_model_settings = config.read_model_settings(
        'trip_destination.yaml')
    sample_table_name = trip_destination_model_settings.get(
        'DEST_CHOICE_SAMPLE_TABLE_NAME')
    want_sample_table = config.setting(
        'want_dest_choice_sample_tables') and sample_table_name is not None

    MAX_ITERATIONS = model_settings.get('MAX_ITERATIONS', 5)

    trips_df = trips.to_frame()
    tours_merged_df = tours_merged.to_frame()

    if trips_df.empty:
        logger.info("%s - no trips. Nothing to do." % trace_label)
        return

    # FIXME could allow MAX_ITERATIONS=0 to allow for cleanup-only run
    # in which case, we would need to drop bad trips, WITHOUT failing bad_trip leg_mates
    assert (MAX_ITERATIONS > 0)

    # if trip_destination has been run before, keep only failed trips (and leg_mates) to retry
    if 'destination' in trips_df:

        if 'failed' not in trips_df.columns:
            # trip_destination model cleaned up any failed trips
            logger.info("%s - no failed column from prior model run." %
                        trace_label)
            return

        elif not trips_df.failed.any():
            # 'failed' column but no failed trips from prior run of trip_destination
            logger.info("%s - no failed trips from prior model run." %
                        trace_label)
            trips_df.drop(columns='failed', inplace=True)
            pipeline.replace_table("trips", trips_df)
            return

        else:
            logger.info(
                "trip_destination has already been run. Rerunning failed trips"
            )
            flag_failed_trip_leg_mates(trips_df, 'failed')
            trips_df = trips_df[trips_df.failed]
            tours_merged_df = tours_merged_df[tours_merged_df.index.isin(
                trips_df.tour_id)]
            logger.info("Rerunning %s failed trips and leg-mates" %
                        trips_df.shape[0])

            # drop any previously saved samples of failed trips
            if want_sample_table and pipeline.is_table(sample_table_name):
                logger.info(
                    "Dropping any previously saved samples of failed trips")
                save_sample_df = pipeline.get_table(sample_table_name)
                save_sample_df.drop(trips_df.index,
                                    level='trip_id',
                                    inplace=True)
                pipeline.replace_table(sample_table_name, save_sample_df)
                del save_sample_df

    # if we estimated trip_destination, there should have been no failed trips
    # if we didn't, but it is enabled, it is probably a configuration error
    # if we just estimated trip_purpose, it isn't clear what they are trying to do , nor how to handle it
    assert not (estimation.manager.begin_estimation('trip_purpose')
                or estimation.manager.begin_estimation('trip_destination'))

    processed_trips = []
    save_samples = []
    i = 0
    TRIP_RESULT_COLUMNS = ['purpose', 'destination', 'origin', 'failed']
    while True:

        i += 1

        for c in TRIP_RESULT_COLUMNS:
            if c in trips_df:
                del trips_df[c]

        trips_df, save_sample_df = run_trip_purpose_and_destination(
            trips_df,
            tours_merged_df,
            chunk_size=chunk_size,
            trace_hh_id=trace_hh_id,
            trace_label=tracing.extend_trace_label(trace_label, "i%s" % i))

        # # if testing, make sure at least one trip fails
        if config.setting('testing_fail_trip_destination', False) \
                and (i == 1) and not trips_df.failed.any():
            fail_o = trips_df[
                trips_df.trip_num < trips_df.trip_count].origin.max()
            trips_df.failed = (trips_df.origin == fail_o) & \
                              (trips_df.trip_num < trips_df.trip_count)

        num_failed_trips = trips_df.failed.sum()

        # if there were no failed trips, we are done
        if num_failed_trips == 0:
            processed_trips.append(trips_df[TRIP_RESULT_COLUMNS])
            if save_sample_df is not None:
                save_samples.append(save_sample_df)
            break

        logger.warning("%s %s failed trips in iteration %s" %
                       (trace_label, num_failed_trips, i))
        file_name = "%s_i%s_failed_trips" % (trace_label, i)
        logger.info("writing failed trips to %s" % file_name)
        tracing.write_csv(trips_df[trips_df.failed],
                          file_name=file_name,
                          transpose=False)

        # if max iterations reached, add remaining trips to processed_trips and give up
        # note that we do this BEFORE failing leg_mates so resulting trip legs are complete
        if i >= MAX_ITERATIONS:
            logger.warning("%s too many iterations %s" % (trace_label, i))
            processed_trips.append(trips_df[TRIP_RESULT_COLUMNS])
            if save_sample_df is not None:
                save_sample_df.drop(trips_df[trips_df.failed].index,
                                    level='trip_id',
                                    inplace=True)
                save_samples.append(save_sample_df)
            break

        # otherwise, if any trips failed, then their leg-mates trips must also fail
        flag_failed_trip_leg_mates(trips_df, 'failed')

        # add the good trips to processed_trips
        processed_trips.append(trips_df[~trips_df.failed][TRIP_RESULT_COLUMNS])

        # and keep the failed ones to retry
        trips_df = trips_df[trips_df.failed]
        tours_merged_df = tours_merged_df[tours_merged_df.index.isin(
            trips_df.tour_id)]

        #  add trip samples of processed_trips to processed_samples
        if save_sample_df is not None:
            # drop failed trip samples
            save_sample_df.drop(trips_df.index, level='trip_id', inplace=True)
            save_samples.append(save_sample_df)

    # - assign result columns to trips
    processed_trips = pd.concat(processed_trips)

    if len(save_samples) > 0:
        save_sample_df = pd.concat(save_samples)
        logger.info("adding %s samples to %s" %
                    (len(save_sample_df), sample_table_name))
        pipeline.extend_table(sample_table_name, save_sample_df)

    logger.info("%s %s failed trips after %s iterations" %
                (trace_label, processed_trips.failed.sum(), i))

    trips_df = trips.to_frame()
    assign_in_place(trips_df, processed_trips)

    trips_df = cleanup_failed_trips(trips_df)

    pipeline.replace_table("trips", trips_df)

    # check to make sure we wrote sample file if requestsd
    if want_sample_table and len(trips_df) > 0:
        assert pipeline.is_table(sample_table_name)
        # since we have saved samples for all successful trips
        # once we discard failed trips, we should samples for all trips
        save_sample_df = pipeline.get_table(sample_table_name)
        # expect samples only for intermediate trip destinatinos
        assert \
            len(save_sample_df.index.get_level_values(0).unique()) == \
            len(trips_df[trips_df.trip_num < trips_df.trip_count])
        del save_sample_df

    if trace_hh_id:
        tracing.trace_df(trips_df,
                         label=trace_label,
                         slicer='trip_id',
                         index_label='trip_id',
                         warn_if_empty=True)
예제 #6
0
def stop_frequency(
        tours, tours_merged,
        stop_frequency_alts,
        skim_dict,
        chunk_size,
        trace_hh_id):
    """
    stop frequency model

    For each tour, shoose a number of intermediate inbound stops and outbound stops.
    Create a trip table with inbound and outbound trips.

    Thus, a tour with stop_frequency '2out_0in' will have two outbound and zero inbound stops,
    and four corresponding trips: three outbound, and one inbound.

    Adds stop_frequency str column to trips, with fields

    creates trips table with columns:

    ::

        - person_id
        - household_id
        - tour_id
        - primary_purpose
        - atwork
        - trip_num
        - outbound
        - trip_count

    """

    trace_label = 'stop_frequency'
    model_settings = config.read_model_settings('stop_frequency.yaml')

    tours = tours.to_frame()
    tours_merged = tours_merged.to_frame()

    assert not tours_merged.household_id.isnull().any()

    assert not (tours_merged.origin == -1).any()
    assert not (tours_merged.destination == -1).any()

    nest_spec = config.get_logit_model_settings(model_settings)
    constants = config.get_model_constants(model_settings)

    # - run preprocessor to annotate tours_merged
    preprocessor_settings = model_settings.get('preprocessor', None)
    if preprocessor_settings:

        # hack: preprocessor adds origin column in place if it does not exist already
        od_skim_stack_wrapper = skim_dict.wrap('origin', 'destination')
        skims = [od_skim_stack_wrapper]

        locals_dict = {
            "od_skims": od_skim_stack_wrapper
        }
        if constants is not None:
            locals_dict.update(constants)

        simulate.set_skim_wrapper_targets(tours_merged, skims)

        # this should be pre-slice as some expressions may count tours by type
        annotations = expressions.compute_columns(
            df=tours_merged,
            model_settings=preprocessor_settings,
            locals_dict=locals_dict,
            trace_label=trace_label)

        assign_in_place(tours_merged, annotations)

    tracing.print_summary('stop_frequency segments',
                          tours_merged.primary_purpose, value_counts=True)

    choices_list = []
    for segment_type, choosers in tours_merged.groupby('primary_purpose'):

        logging.info("%s running segment %s with %s chooser rows" %
                     (trace_label, segment_type, choosers.shape[0]))

        spec = simulate.read_model_spec(file_name='stop_frequency_%s.csv' % segment_type)

        assert spec is not None, "spec for segment_type %s not found" % segment_type

        choices = simulate.simple_simulate(
            choosers=choosers,
            spec=spec,
            nest_spec=nest_spec,
            locals_d=constants,
            chunk_size=chunk_size,
            trace_label=tracing.extend_trace_label(trace_label, segment_type),
            trace_choice_name='stops')

        # convert indexes to alternative names
        choices = pd.Series(spec.columns[choices.values], index=choices.index)

        choices_list.append(choices)

    choices = pd.concat(choices_list)

    tracing.print_summary('stop_frequency', choices, value_counts=True)

    # add stop_frequency choices to tours table
    assign_in_place(tours, choices.to_frame('stop_frequency'))

    if 'primary_purpose' not in tours.columns:
        assign_in_place(tours, tours_merged[['primary_purpose']])

    pipeline.replace_table("tours", tours)

    # create trips table
    trips = process_trips(tours, stop_frequency_alts)
    trips = pipeline.extend_table("trips", trips)
    tracing.register_traceable_table('trips', trips)
    pipeline.get_rn_generator().add_channel('trips', trips)

    if trace_hh_id:
        tracing.trace_df(tours,
                         label="stop_frequency.tours",
                         slicer='person_id',
                         columns=None)

        tracing.trace_df(trips,
                         label="stop_frequency.trips",
                         slicer='person_id',
                         columns=None)

        tracing.trace_df(annotations,
                         label="stop_frequency.annotations",
                         columns=None)

        tracing.trace_df(tours_merged,
                         label="stop_frequency.tours_merged",
                         slicer='person_id',
                         columns=None)
예제 #7
0
def mandatory_tour_frequency(persons_merged, chunk_size, trace_hh_id):
    """
    This model predicts the frequency of making mandatory trips (see the
    alternatives above) - these trips include work and school in some combination.
    """
    trace_label = 'mandatory_tour_frequency'

    model_settings = config.read_model_settings(
        'mandatory_tour_frequency.yaml')
    model_spec = simulate.read_model_spec(
        file_name='mandatory_tour_frequency.csv')
    alternatives = simulate.read_model_alts(
        config.config_file_path('mandatory_tour_frequency_alternatives.csv'),
        set_index='alt')

    choosers = persons_merged.to_frame()
    # filter based on results of CDAP
    choosers = choosers[choosers.cdap_activity == 'M']
    logger.info("Running mandatory_tour_frequency with %d persons",
                len(choosers))

    # - if no mandatory tours
    if choosers.shape[0] == 0:
        add_null_results(trace_label, model_settings)
        return

    # - preprocessor
    preprocessor_settings = model_settings.get('preprocessor', None)
    if preprocessor_settings:

        locals_dict = {}

        expressions.assign_columns(df=choosers,
                                   model_settings=preprocessor_settings,
                                   locals_dict=locals_dict,
                                   trace_label=trace_label)

    nest_spec = config.get_logit_model_settings(model_settings)
    constants = config.get_model_constants(model_settings)

    choices = simulate.simple_simulate(
        choosers=choosers,
        spec=model_spec,
        nest_spec=nest_spec,
        locals_d=constants,
        chunk_size=chunk_size,
        trace_label=trace_label,
        trace_choice_name='mandatory_tour_frequency')

    # convert indexes to alternative names
    choices = pd.Series(model_spec.columns[choices.values],
                        index=choices.index).reindex(
                            persons_merged.local.index)

    # - create mandatory tours
    """
    This reprocesses the choice of index of the mandatory tour frequency
    alternatives into an actual dataframe of tours.  Ending format is
    the same as got non_mandatory_tours except trip types are "work" and "school"
    """
    choosers['mandatory_tour_frequency'] = choices
    mandatory_tours = process_mandatory_tours(
        persons=choosers, mandatory_tour_frequency_alts=alternatives)

    tours = pipeline.extend_table("tours", mandatory_tours)
    tracing.register_traceable_table('tours', mandatory_tours)
    pipeline.get_rn_generator().add_channel('tours', mandatory_tours)

    # - annotate persons
    persons = inject.get_table('persons').to_frame()

    # need to reindex as we only handled persons with cdap_activity == 'M'
    persons['mandatory_tour_frequency'] = choices.reindex(
        persons.index).fillna('').astype(str)

    expressions.assign_columns(
        df=persons,
        model_settings=model_settings.get('annotate_persons'),
        trace_label=tracing.extend_trace_label(trace_label,
                                               'annotate_persons'))

    pipeline.replace_table("persons", persons)

    tracing.print_summary('mandatory_tour_frequency',
                          persons.mandatory_tour_frequency,
                          value_counts=True)

    if trace_hh_id:
        tracing.trace_df(mandatory_tours,
                         label="mandatory_tour_frequency.mandatory_tours",
                         warn_if_empty=True)

        tracing.trace_df(persons,
                         label="mandatory_tour_frequency.persons",
                         warn_if_empty=True)
def atwork_subtour_destination(
        tours,
        persons_merged,
        network_los,
        chunk_size, trace_hh_id):

    trace_label = 'atwork_subtour_destination'
    model_settings_file_name = 'atwork_subtour_destination.yaml'
    model_settings = config.read_model_settings(model_settings_file_name)

    future_settings = {
        'SIZE_TERM_SELECTOR': 'atwork',
        'SEGMENTS': ['atwork'],
        'ORIG_ZONE_ID': 'workplace_zone_id'
    }
    model_settings = config.future_model_settings(model_settings_file_name, model_settings, future_settings)

    destination_column_name = 'destination'
    logsum_column_name = model_settings.get('DEST_CHOICE_LOGSUM_COLUMN_NAME')
    want_logsums = logsum_column_name is not None

    sample_table_name = model_settings.get('DEST_CHOICE_SAMPLE_TABLE_NAME')
    want_sample_table = config.setting('want_dest_choice_sample_tables') and sample_table_name is not None

    persons_merged = persons_merged.to_frame()

    tours = tours.to_frame()
    subtours = tours[tours.tour_category == 'atwork']

    # - if no atwork subtours
    if subtours.shape[0] == 0:
        tracing.no_results('atwork_subtour_destination')
        return

    estimator = estimation.manager.begin_estimation('atwork_subtour_destination')
    if estimator:
        estimator.write_coefficients(model_settings=model_settings)
        # estimator.write_spec(model_settings, tag='SAMPLE_SPEC')
        estimator.write_spec(model_settings, tag='SPEC')
        estimator.set_alt_id(model_settings["ALT_DEST_COL_NAME"])
        estimator.write_table(inject.get_injectable('size_terms'), 'size_terms', append=False)
        estimator.write_table(inject.get_table('land_use').to_frame(), 'landuse', append=False)
        estimator.write_model_settings(model_settings, model_settings_file_name)

    choices_df, save_sample_df = tour_destination.run_tour_destination(
        subtours,
        persons_merged,
        want_logsums,
        want_sample_table,
        model_settings,
        network_los,
        estimator,
        chunk_size, trace_hh_id, trace_label)

    if estimator:
        estimator.write_choices(choices_df['choice'])
        choices_df['choice'] = estimator.get_survey_values(choices_df['choice'], 'tours', 'destination')
        estimator.write_override_choices(choices_df['choice'])
        estimator.end_estimation()

    subtours[destination_column_name] = choices_df['choice']
    assign_in_place(tours, subtours[[destination_column_name]])

    if want_logsums:
        subtours[logsum_column_name] = choices_df['logsum']
        assign_in_place(tours, subtours[[logsum_column_name]])

    pipeline.replace_table("tours", tours)

    tracing.print_summary(destination_column_name,
                          subtours[destination_column_name],
                          describe=True)

    if want_sample_table:
        assert len(save_sample_df.index.get_level_values(0).unique()) == len(choices_df)
        # save_sample_df.set_index(model_settings['ALT_DEST_COL_NAME'], append=True, inplace=True)
        pipeline.extend_table(sample_table_name, save_sample_df)

    if trace_hh_id:
        tracing.trace_df(tours,
                         label='atwork_subtour_destination',
                         columns=['destination'])
예제 #9
0
def stop_frequency(tours, tours_merged, stop_frequency_alts, skim_dict,
                   chunk_size, trace_hh_id):
    """
    stop frequency model

    For each tour, shoose a number of intermediate inbound stops and outbound stops.
    Create a trip table with inbound and outbound trips.

    Thus, a tour with stop_frequency '2out_0in' will have two outbound and zero inbound stops,
    and four corresponding trips: three outbound, and one inbound.

    Adds stop_frequency str column to trips, with fields

    creates trips table with columns:

    ::

        - person_id
        - household_id
        - tour_id
        - primary_purpose
        - atwork
        - trip_num
        - outbound
        - trip_count

    """

    trace_label = 'stop_frequency'
    model_settings = config.read_model_settings('stop_frequency.yaml')

    tours = tours.to_frame()
    tours_merged = tours_merged.to_frame()

    assert not tours_merged.household_id.isnull().any()

    assert not (tours_merged.origin == -1).any()
    assert not (tours_merged.destination == -1).any()

    nest_spec = config.get_logit_model_settings(model_settings)
    constants = config.get_model_constants(model_settings)

    # - run preprocessor to annotate tours_merged
    preprocessor_settings = model_settings.get('preprocessor', None)
    if preprocessor_settings:

        # hack: preprocessor adds origin column in place if it does not exist already
        od_skim_stack_wrapper = skim_dict.wrap('origin', 'destination')
        skims = [od_skim_stack_wrapper]

        locals_dict = {"od_skims": od_skim_stack_wrapper}
        if constants is not None:
            locals_dict.update(constants)

        simulate.set_skim_wrapper_targets(tours_merged, skims)

        # this should be pre-slice as some expressions may count tours by type
        annotations = expressions.compute_columns(
            df=tours_merged,
            model_settings=preprocessor_settings,
            locals_dict=locals_dict,
            trace_label=trace_label)

        assign_in_place(tours_merged, annotations)

    tracing.print_summary('stop_frequency segments',
                          tours_merged.primary_purpose,
                          value_counts=True)

    choices_list = []
    for segment_type, choosers in tours_merged.groupby('primary_purpose'):

        logging.info("%s running segment %s with %s chooser rows" %
                     (trace_label, segment_type, choosers.shape[0]))

        spec = simulate.read_model_spec(file_name='stop_frequency_%s.csv' %
                                        segment_type)

        assert spec is not None, "spec for segment_type %s not found" % segment_type

        choices = simulate.simple_simulate(
            choosers=choosers,
            spec=spec,
            nest_spec=nest_spec,
            locals_d=constants,
            chunk_size=chunk_size,
            trace_label=tracing.extend_trace_label(trace_label, segment_type),
            trace_choice_name='stops')

        # convert indexes to alternative names
        choices = pd.Series(spec.columns[choices.values], index=choices.index)

        choices_list.append(choices)

    choices = pd.concat(choices_list)

    tracing.print_summary('stop_frequency', choices, value_counts=True)

    # add stop_frequency choices to tours table
    assign_in_place(tours, choices.to_frame('stop_frequency'))

    if 'primary_purpose' not in tours.columns:
        assign_in_place(tours, tours_merged[['primary_purpose']])

    pipeline.replace_table("tours", tours)

    # create trips table
    trips = process_trips(tours, stop_frequency_alts)
    trips = pipeline.extend_table("trips", trips)
    tracing.register_traceable_table('trips', trips)
    pipeline.get_rn_generator().add_channel('trips', trips)

    if trace_hh_id:
        tracing.trace_df(tours,
                         label="stop_frequency.tours",
                         slicer='person_id',
                         columns=None)

        tracing.trace_df(trips,
                         label="stop_frequency.trips",
                         slicer='person_id',
                         columns=None)

        tracing.trace_df(annotations,
                         label="stop_frequency.annotations",
                         columns=None)

        tracing.trace_df(tours_merged,
                         label="stop_frequency.tours_merged",
                         slicer='person_id',
                         columns=None)
예제 #10
0
def iterate_location_choice(model_settings, persons_merged, persons,
                            households, network_los, estimator, chunk_size,
                            trace_hh_id, locutor, trace_label):
    """
    iterate run_location_choice updating shadow pricing until convergence criteria satisfied
    or max_iterations reached.

    (If use_shadow_pricing not enabled, then just iterate once)

    Parameters
    ----------
    model_settings : dict
    persons_merged : injected table
    persons : injected table
    network_los : los.Network_LOS
    chunk_size : int
    trace_hh_id : int
    locutor : bool
        whether this process is the privileged logger of shadow_pricing when multiprocessing
    trace_label : str

    Returns
    -------
    adds choice column model_settings['DEST_CHOICE_COLUMN_NAME']
    adds logsum column model_settings['DEST_CHOICE_LOGSUM_COLUMN_NAME']- if provided
    adds annotations to persons table
    """

    chunk_tag = trace_label

    # boolean to filter out persons not needing location modeling (e.g. is_worker, is_student)
    chooser_filter_column = model_settings['CHOOSER_FILTER_COLUMN_NAME']

    dest_choice_column_name = model_settings['DEST_CHOICE_COLUMN_NAME']
    logsum_column_name = model_settings.get('DEST_CHOICE_LOGSUM_COLUMN_NAME')

    sample_table_name = model_settings.get('DEST_CHOICE_SAMPLE_TABLE_NAME')
    want_sample_table = config.setting(
        'want_dest_choice_sample_tables') and sample_table_name is not None

    persons_merged_df = persons_merged.to_frame()

    persons_merged_df = persons_merged_df[
        persons_merged[chooser_filter_column]]

    persons_merged_df.sort_index(
        inplace=True
    )  # interaction_sample expects chooser index to be monotonic increasing

    # chooser segmentation allows different sets coefficients for e.g. different income_segments or tour_types
    chooser_segment_column = model_settings['CHOOSER_SEGMENT_COLUMN_NAME']

    assert chooser_segment_column in persons_merged_df, \
        f"CHOOSER_SEGMENT_COLUMN '{chooser_segment_column}' not in persons_merged table."

    spc = shadow_pricing.load_shadow_price_calculator(model_settings)
    max_iterations = spc.max_iterations
    assert not (spc.use_shadow_pricing and estimator)

    logger.debug("%s max_iterations: %s" % (trace_label, max_iterations))

    for iteration in range(1, max_iterations + 1):

        if spc.use_shadow_pricing and iteration > 1:
            spc.update_shadow_prices()

        choices_df, save_sample_df = run_location_choice(
            persons_merged_df,
            network_los,
            shadow_price_calculator=spc,
            want_logsums=logsum_column_name is not None,
            want_sample_table=want_sample_table,
            estimator=estimator,
            model_settings=model_settings,
            chunk_size=chunk_size,
            chunk_tag=chunk_tag,
            trace_hh_id=trace_hh_id,
            trace_label=tracing.extend_trace_label(trace_label,
                                                   'i%s' % iteration))

        # choices_df is a pandas DataFrame with columns 'choice' and (optionally) 'logsum'
        if choices_df is None:
            break

        spc.set_choices(
            choices=choices_df['choice'],
            segment_ids=persons_merged_df[chooser_segment_column].reindex(
                choices_df.index))

        if locutor:
            spc.write_trace_files(iteration)

        if spc.use_shadow_pricing and spc.check_fit(iteration):
            logging.info("%s converged after iteration %s" % (
                trace_label,
                iteration,
            ))
            break

    # - shadow price table
    if locutor:
        if spc.use_shadow_pricing and 'SHADOW_PRICE_TABLE' in model_settings:
            inject.add_table(model_settings['SHADOW_PRICE_TABLE'],
                             spc.shadow_prices)
        if 'MODELED_SIZE_TABLE' in model_settings:
            inject.add_table(model_settings['MODELED_SIZE_TABLE'],
                             spc.modeled_size)

    persons_df = persons.to_frame()

    # add the choice values to the dest_choice_column in persons dataframe
    # We only chose school locations for the subset of persons who go to school
    # so we backfill the empty choices with -1 to code as no school location
    # names for location choice and (optional) logsums columns
    NO_DEST_ZONE = -1
    persons_df[dest_choice_column_name] = \
        choices_df['choice'].reindex(persons_df.index).fillna(NO_DEST_ZONE).astype(int)

    # add the dest_choice_logsum column to persons dataframe
    if logsum_column_name:
        persons_df[logsum_column_name] = \
            choices_df['logsum'].reindex(persons_df.index).astype('float')

    if save_sample_df is not None:
        # might be None for tiny samples even if sample_table_name was specified
        assert len(save_sample_df.index.get_level_values(0).unique()) == len(
            choices_df)
        # lest they try to put school and workplace samples into the same table
        if pipeline.is_table(sample_table_name):
            raise RuntimeError("dest choice sample table %s already exists" %
                               sample_table_name)
        pipeline.extend_table(sample_table_name, save_sample_df)

    # - annotate persons table
    if 'annotate_persons' in model_settings:
        expressions.assign_columns(
            df=persons_df,
            model_settings=model_settings.get('annotate_persons'),
            trace_label=tracing.extend_trace_label(trace_label,
                                                   'annotate_persons'))

        pipeline.replace_table("persons", persons_df)

        if trace_hh_id:
            tracing.trace_df(persons_df, label=trace_label, warn_if_empty=True)

    # - annotate households table
    if 'annotate_households' in model_settings:

        households_df = households.to_frame()
        expressions.assign_columns(
            df=households_df,
            model_settings=model_settings.get('annotate_households'),
            trace_label=tracing.extend_trace_label(trace_label,
                                                   'annotate_households'))
        pipeline.replace_table("households", households_df)

        if trace_hh_id:
            tracing.trace_df(households_df,
                             label=trace_label,
                             warn_if_empty=True)

    if logsum_column_name:
        tracing.print_summary(logsum_column_name,
                              choices_df['logsum'],
                              value_counts=True)

    return persons_df
def atwork_subtour_destination(tours, persons_merged, skim_dict, skim_stack,
                               land_use, size_terms, chunk_size, trace_hh_id):

    trace_label = 'atwork_subtour_destination'
    model_settings_file_name = 'atwork_subtour_destination.yaml'
    model_settings = config.read_model_settings(model_settings_file_name)

    destination_column_name = 'destination'
    logsum_column_name = model_settings.get('DEST_CHOICE_LOGSUM_COLUMN_NAME')
    want_logsums = logsum_column_name is not None

    sample_table_name = model_settings.get('DEST_CHOICE_SAMPLE_TABLE_NAME')
    want_sample_table = config.setting(
        'want_dest_choice_sample_tables') and sample_table_name is not None

    persons_merged = persons_merged.to_frame()

    tours = tours.to_frame()
    subtours = tours[tours.tour_category == 'atwork']
    # interaction_sample_simulate insists choosers appear in same order as alts
    subtours = subtours.sort_index()

    # - if no atwork subtours
    if subtours.shape[0] == 0:
        tracing.no_results('atwork_subtour_destination')
        return

    estimator = estimation.manager.begin_estimation(
        'atwork_subtour_destination')
    if estimator:
        estimator.write_coefficients(
            simulate.read_model_coefficients(model_settings))
        # estimator.write_spec(model_settings, tag='SAMPLE_SPEC')
        estimator.write_spec(model_settings, tag='SPEC')
        estimator.set_alt_id(model_settings["ALT_DEST_COL_NAME"])
        estimator.write_table(inject.get_injectable('size_terms'),
                              'size_terms',
                              append=False)
        estimator.write_table(inject.get_table('land_use').to_frame(),
                              'landuse',
                              append=False)
        estimator.write_model_settings(model_settings,
                                       model_settings_file_name)

    destination_size_terms = tour_destination_size_terms(
        land_use, size_terms, 'atwork')

    destination_sample_df = atwork_subtour_destination_sample(
        subtours,
        persons_merged,
        model_settings,
        skim_dict,
        destination_size_terms,
        estimator=estimator,
        chunk_size=chunk_size,
        trace_label=tracing.extend_trace_label(trace_label, 'sample'))

    destination_sample_df = atwork_subtour_destination_logsums(
        persons_merged,
        destination_sample_df,
        model_settings,
        skim_dict,
        skim_stack,
        chunk_size=chunk_size,
        trace_label=tracing.extend_trace_label(trace_label, 'logsums'))

    choices_df = atwork_subtour_destination_simulate(
        subtours,
        persons_merged,
        destination_sample_df,
        want_logsums,
        model_settings,
        skim_dict,
        destination_size_terms,
        estimator=estimator,
        chunk_size=chunk_size,
        trace_label=tracing.extend_trace_label(trace_label, 'simulate'))

    if estimator:
        estimator.write_choices(choices_df['choice'])
        choices_df['choice'] = estimator.get_survey_values(
            choices_df['choice'], 'tours', 'destination')
        estimator.write_override_choices(choices_df['choice'])
        estimator.end_estimation()

    subtours[destination_column_name] = choices_df['choice']
    assign_in_place(tours, subtours[[destination_column_name]])

    if want_logsums:
        subtours[logsum_column_name] = choices_df['logsum']
        assign_in_place(tours, subtours[[logsum_column_name]])

    pipeline.replace_table("tours", tours)

    if want_sample_table:
        # FIXME - sample_table
        assert len(destination_sample_df.index.unique()) == len(choices_df)
        destination_sample_df.set_index(model_settings['ALT_DEST_COL_NAME'],
                                        append=True,
                                        inplace=True)
        pipeline.extend_table(sample_table_name, destination_sample_df)

    tracing.print_summary(destination_column_name,
                          subtours[destination_column_name],
                          describe=True)

    if trace_hh_id:
        tracing.trace_df(tours,
                         label='atwork_subtour_destination',
                         columns=['destination'])
예제 #12
0
def tour_od_choice(
        tours,
        persons,
        households,
        land_use,
        network_los,
        chunk_size,
        trace_hh_id):

    """Simulates joint origin/destination choice for all tours.

    Given a set of previously generated tours, each tour needs to have an
    origin and a destination. In this case tours are the choosers, but
    the associated person that's making the tour does not necessarily have
    a home location assigned already. So we choose a tour origin at the same
    time as we choose a tour destination, and assign the tour origin as that
    person's home location.

    Parameters
    ----------
    tours : orca.DataFrameWrapper
        lazy-loaded tours table
    persons : orca.DataFrameWrapper
        lazy-loaded persons table
    households : orca.DataFrameWrapper
        lazy-loaded households table
    land_use : orca.DataFrameWrapper
        lazy-loaded land use data table
    stop_frequency_alts : orca.DataFrameWrapper
        lazy-loaded table of stop frequency alternatives, e.g. "1out2in"
    network_los : orca._InjectableFuncWrapper
        lazy-loaded activitysim.los.Network_LOS object
    chunk_size
        simulation chunk size, set in main settings.yaml
    trace_hh_id : int
        households to trace, set in main settings.yaml
    """

    trace_label = 'tour_od_choice'
    model_settings_file_name = 'tour_od_choice.yaml'
    model_settings = config.read_model_settings(model_settings_file_name)
    origin_col_name = model_settings['ORIG_COL_NAME']
    dest_col_name = model_settings['DEST_COL_NAME']
    alt_id_col = tour_od.get_od_id_col(origin_col_name, dest_col_name)

    sample_table_name = model_settings.get('OD_CHOICE_SAMPLE_TABLE_NAME')
    want_sample_table = config.setting('want_dest_choice_sample_tables') and sample_table_name is not None

    logsum_column_name = model_settings.get('OD_CHOICE_LOGSUM_COLUMN_NAME', None)
    want_logsums = logsum_column_name is not None

    tours = tours.to_frame()

    # interaction_sample_simulate insists choosers appear in same order as alts
    tours = tours.sort_index()

    estimator = estimation.manager.begin_estimation('tour_od_choice')
    if estimator:
        estimator.write_coefficients(model_settings=model_settings)
        estimator.write_spec(model_settings, tag='SAMPLE_SPEC')
        estimator.write_spec(model_settings, tag='SPEC')
        estimator.set_alt_id(alt_id_col)
        estimator.write_table(inject.get_injectable('size_terms'), 'size_terms', append=False)
        estimator.write_table(inject.get_table('land_use').to_frame(), 'landuse', append=False)
        estimator.write_model_settings(model_settings, model_settings_file_name)

    choices_df, save_sample_df = tour_od.run_tour_od(
        tours,
        persons,
        want_logsums,
        want_sample_table,
        model_settings,
        network_los,
        estimator,
        chunk_size, trace_hh_id, trace_label)

    if estimator:
        assert estimator.want_unsampled_alternatives
        estimator.write_choices(choices_df.choice)
        survey_od = estimator.get_survey_values(
            choices_df.choice, 'tours', [origin_col_name, dest_col_name])
        choices_df[origin_col_name] = survey_od[origin_col_name]
        choices_df[dest_col_name] = survey_od[dest_col_name]
        survey_od[alt_id_col] = tour_od.create_od_id_col(survey_od, origin_col_name, dest_col_name)
        choices_df.choice = survey_od[alt_id_col]
        estimator.write_override_choices(choices_df.choice)
        estimator.end_estimation()

    tours[origin_col_name] = choices_df[origin_col_name].reindex(tours.index)
    tours[dest_col_name] = choices_df[dest_col_name].reindex(tours.index)
    if want_logsums:
        tours[logsum_column_name] = \
            choices_df['logsum'].reindex(tours.index).astype('float')
    tours['poe_id'] = tours[origin_col_name].map(land_use.to_frame(columns='poe_id').poe_id)

    households = households.to_frame()
    persons = persons.to_frame()
    households[origin_col_name] = tours.set_index('household_id')[origin_col_name].reindex(households.index)
    persons[origin_col_name] = households[origin_col_name].reindex(persons.household_id).values

    # Downstream steps require that persons and households have a 'home_zone_id'
    # column. We assume that if the tour_od_choice model is used, this field is
    # missing from the population data, so it gets inherited from the tour origin
    households['home_zone_id'] = households[origin_col_name]
    persons['home_zone_id'] = persons[origin_col_name]

    pipeline.replace_table("tours", tours)
    pipeline.replace_table("persons", persons)
    pipeline.replace_table("households", households)

    if want_sample_table:
        assert len(save_sample_df.index.get_level_values(0).unique()) == len(choices_df)
        pipeline.extend_table(sample_table_name, save_sample_df)

    if trace_hh_id:
        tracing.trace_df(tours,
                         label="tours_od_choice",
                         slicer='person_id',
                         index_label='tour',
                         columns=None,
                         warn_if_empty=True)
예제 #13
0
def joint_tour_frequency(households, persons, chunk_size, trace_hh_id):
    """
    This model predicts the frequency of making fully joint trips (see the
    alternatives above).
    """
    trace_label = 'joint_tour_frequency'
    model_settings = config.read_model_settings('joint_tour_frequency.yaml')
    model_spec = simulate.read_model_spec(file_name='joint_tour_frequency.csv')

    alternatives = simulate.read_model_alts(
        config.config_file_path('joint_tour_frequency_alternatives.csv'),
        set_index='alt')

    # - only interested in households with more than one cdap travel_active person
    households = households.to_frame()
    multi_person_households = households[
        households.num_travel_active > 1].copy()

    # - only interested in persons in multi_person_households
    # FIXME - gratuitous pathological efficiency move, just let yaml specify persons?
    persons = persons.to_frame()
    persons = persons[persons.household_id.isin(multi_person_households.index)]

    logger.info(
        "Running joint_tour_frequency with %d multi-person households" %
        multi_person_households.shape[0])

    # - preprocessor
    preprocessor_settings = model_settings.get('preprocessor', None)
    if preprocessor_settings:

        locals_dict = {
            'persons': persons,
            'hh_time_window_overlap': hh_time_window_overlap
        }

        expressions.assign_columns(df=multi_person_households,
                                   model_settings=preprocessor_settings,
                                   locals_dict=locals_dict,
                                   trace_label=trace_label)

    # - simple_simulate

    nest_spec = config.get_logit_model_settings(model_settings)
    constants = config.get_model_constants(model_settings)

    choices = simulate.simple_simulate(
        choosers=multi_person_households,
        spec=model_spec,
        nest_spec=nest_spec,
        locals_d=constants,
        chunk_size=chunk_size,
        trace_label=trace_label,
        trace_choice_name='joint_tour_frequency')

    # convert indexes to alternative names
    choices = pd.Series(model_spec.columns[choices.values],
                        index=choices.index)

    # - create joint_tours based on joint_tour_frequency choices

    # - we need a person_id in order to generate the tour index (and for register_traceable_table)
    # - but we don't know the tour participants yet
    # - so we arbitrarily choose the first person in the household
    # - to be point person for the purpose of generating an index and setting origin
    temp_point_persons = persons.loc[persons.PNUM == 1]
    temp_point_persons['person_id'] = temp_point_persons.index
    temp_point_persons = temp_point_persons.set_index('household_id')
    temp_point_persons = temp_point_persons[['person_id', 'home_taz']]

    joint_tours = \
        process_joint_tours(choices, alternatives, temp_point_persons)

    tours = pipeline.extend_table("tours", joint_tours)

    tracing.register_traceable_table('tours', joint_tours)
    pipeline.get_rn_generator().add_channel('tours', joint_tours)

    # - annotate households
    # add joint_tour_frequency and num_hh_joint_tours columns to households
    # reindex since we ran model on a subset of households
    households['joint_tour_frequency'] = choices.reindex(
        households.index).fillna('').astype(str)

    households['num_hh_joint_tours'] = joint_tours.groupby('household_id').size().\
        reindex(households.index).fillna(0).astype(np.int8)

    pipeline.replace_table("households", households)

    tracing.print_summary('joint_tour_frequency',
                          households.joint_tour_frequency,
                          value_counts=True)

    if trace_hh_id:
        tracing.trace_df(households, label="joint_tour_frequency.households")

        tracing.trace_df(joint_tours,
                         label="joint_tour_frequency.joint_tours",
                         slicer='household_id')
예제 #14
0
def mandatory_tour_frequency(persons_merged,
                             chunk_size,
                             trace_hh_id):
    """
    This model predicts the frequency of making mandatory trips (see the
    alternatives above) - these trips include work and school in some combination.
    """
    trace_label = 'mandatory_tour_frequency'

    model_settings = config.read_model_settings('mandatory_tour_frequency.yaml')
    model_spec = simulate.read_model_spec(file_name='mandatory_tour_frequency.csv')
    alternatives = simulate.read_model_alts(
        config.config_file_path('mandatory_tour_frequency_alternatives.csv'), set_index='alt')

    choosers = persons_merged.to_frame()
    # filter based on results of CDAP
    choosers = choosers[choosers.cdap_activity == 'M']
    logger.info("Running mandatory_tour_frequency with %d persons", len(choosers))

    # - if no mandatory tours
    if choosers.shape[0] == 0:
        add_null_results(trace_label, model_settings)
        return

    # - preprocessor
    preprocessor_settings = model_settings.get('preprocessor', None)
    if preprocessor_settings:

        locals_dict = {}

        expressions.assign_columns(
            df=choosers,
            model_settings=preprocessor_settings,
            locals_dict=locals_dict,
            trace_label=trace_label)

    nest_spec = config.get_logit_model_settings(model_settings)
    constants = config.get_model_constants(model_settings)

    choices = simulate.simple_simulate(
        choosers=choosers,
        spec=model_spec,
        nest_spec=nest_spec,
        locals_d=constants,
        chunk_size=chunk_size,
        trace_label=trace_label,
        trace_choice_name='mandatory_tour_frequency')

    # convert indexes to alternative names
    choices = pd.Series(
        model_spec.columns[choices.values],
        index=choices.index).reindex(persons_merged.local.index)

    # - create mandatory tours
    """
    This reprocesses the choice of index of the mandatory tour frequency
    alternatives into an actual dataframe of tours.  Ending format is
    the same as got non_mandatory_tours except trip types are "work" and "school"
    """
    choosers['mandatory_tour_frequency'] = choices
    mandatory_tours = process_mandatory_tours(
        persons=choosers,
        mandatory_tour_frequency_alts=alternatives
    )

    tours = pipeline.extend_table("tours", mandatory_tours)
    tracing.register_traceable_table('tours', mandatory_tours)
    pipeline.get_rn_generator().add_channel('tours', mandatory_tours)

    # - annotate persons
    persons = inject.get_table('persons').to_frame()

    # need to reindex as we only handled persons with cdap_activity == 'M'
    persons['mandatory_tour_frequency'] = choices.reindex(persons.index).fillna('').astype(str)

    expressions.assign_columns(
        df=persons,
        model_settings=model_settings.get('annotate_persons'),
        trace_label=tracing.extend_trace_label(trace_label, 'annotate_persons'))

    pipeline.replace_table("persons", persons)

    tracing.print_summary('mandatory_tour_frequency', persons.mandatory_tour_frequency,
                          value_counts=True)

    if trace_hh_id:
        tracing.trace_df(mandatory_tours,
                         label="mandatory_tour_frequency.mandatory_tours",
                         warn_if_empty=True)

        tracing.trace_df(persons,
                         label="mandatory_tour_frequency.persons",
                         warn_if_empty=True)
def joint_tour_destination(tours, persons_merged, households_merged,
                           network_los, chunk_size, trace_hh_id):
    """
    Given the tour generation from the above, each tour needs to have a
    destination, so in this case tours are the choosers (with the associated
    person that's making the tour)
    """

    trace_label = 'joint_tour_destination'
    model_settings_file_name = 'joint_tour_destination.yaml'
    model_settings = config.read_model_settings(model_settings_file_name)

    logsum_column_name = model_settings.get('DEST_CHOICE_LOGSUM_COLUMN_NAME')
    want_logsums = logsum_column_name is not None

    sample_table_name = model_settings.get('DEST_CHOICE_SAMPLE_TABLE_NAME')
    want_sample_table = config.setting(
        'want_dest_choice_sample_tables') and sample_table_name is not None

    # choosers are tours - in a sense tours are choosing their destination
    tours = tours.to_frame()
    joint_tours = tours[tours.tour_category == 'joint']

    persons_merged = persons_merged.to_frame()

    # - if no joint tours
    if joint_tours.shape[0] == 0:
        tracing.no_results('joint_tour_destination')
        return

    estimator = estimation.manager.begin_estimation('joint_tour_destination')
    if estimator:
        estimator.write_coefficients(model_settings=model_settings)
        # estimator.write_spec(model_settings, tag='SAMPLE_SPEC')
        estimator.write_spec(model_settings, tag='SPEC')
        estimator.set_alt_id(model_settings["ALT_DEST_COL_NAME"])
        estimator.write_table(inject.get_injectable('size_terms'),
                              'size_terms',
                              append=False)
        estimator.write_table(inject.get_table('land_use').to_frame(),
                              'landuse',
                              append=False)
        estimator.write_model_settings(model_settings,
                                       model_settings_file_name)

    choices_df, save_sample_df = tour_destination.run_tour_destination(
        tours, persons_merged, want_logsums, want_sample_table, model_settings,
        network_los, estimator, chunk_size, trace_hh_id, trace_label)

    if estimator:
        estimator.write_choices(choices_df.choice)
        choices_df.choice = estimator.get_survey_values(
            choices_df.choice, 'tours', 'destination')
        estimator.write_override_choices(choices_df.choice)
        estimator.end_estimation()

    # add column as we want joint_tours table for tracing.
    joint_tours['destination'] = choices_df.choice
    assign_in_place(tours, joint_tours[['destination']])
    pipeline.replace_table("tours", tours)

    if want_logsums:
        joint_tours[logsum_column_name] = choices_df['logsum']
        assign_in_place(tours, joint_tours[[logsum_column_name]])

    tracing.print_summary('destination',
                          joint_tours.destination,
                          describe=True)

    if want_sample_table:
        assert len(save_sample_df.index.get_level_values(0).unique()) == len(
            choices_df)
        # save_sample_df.set_index(model_settings['ALT_DEST_COL_NAME'], append=True, inplace=True)
        pipeline.extend_table(sample_table_name, save_sample_df)

    if trace_hh_id:
        tracing.trace_df(joint_tours,
                         label="joint_tour_destination.joint_tours")
def atwork_subtour_frequency(tours, persons_merged,
                             atwork_subtour_frequency_spec,
                             atwork_subtour_frequency_settings,
                             atwork_subtour_frequency_alternatives, chunk_size,
                             trace_hh_id):
    """
    This model predicts the frequency of making at-work subtour tours
    (alternatives for this model come from a separate csv file which is
    configured by the user).
    """

    trace_label = 'atwork_subtour_frequency'

    tours = tours.to_frame()
    persons_merged = persons_merged.to_frame()

    work_tours = tours[tours.tour_type == 'work']

    # merge persons into work_tours
    work_tours = pd.merge(work_tours,
                          persons_merged,
                          left_on='person_id',
                          right_index=True)

    logger.info("Running atwork_subtour_frequency with %d work tours" %
                len(work_tours))

    nest_spec = config.get_logit_model_settings(
        atwork_subtour_frequency_settings)
    constants = config.get_model_constants(atwork_subtour_frequency_settings)

    choices = simulate.simple_simulate(
        choosers=work_tours,
        spec=atwork_subtour_frequency_spec,
        nest_spec=nest_spec,
        locals_d=constants,
        trace_label=trace_label,
        trace_choice_name='atwork_subtour_frequency')

    # convert indexes to alternative names
    choices = pd.Series(atwork_subtour_frequency_spec.columns[choices.values],
                        index=choices.index)

    tracing.print_summary('atwork_subtour_frequency',
                          choices,
                          value_counts=True)

    # reindex since we are working with a subset of tours
    choices = choices.reindex(tours.index)

    # add atwork_subtour_frequency column to tours
    tours['atwork_subtour_frequency'] = choices
    pipeline.replace_table("tours", tours)

    # - create atwork_subtours based on atwork_subtour_frequency choice names
    work_tours = tours[tours.tour_type == 'work']
    assert not work_tours.atwork_subtour_frequency.isnull().any()

    subtours = process_atwork_subtours(work_tours,
                                       atwork_subtour_frequency_alternatives)

    pipeline.extend_table("tours", subtours)
    tracing.register_traceable_table('tours', subtours)
    pipeline.get_rn_generator().add_channel(subtours, 'tours')

    if trace_hh_id:
        trace_columns = ['atwork_subtour_frequency']
        tracing.trace_df(inject.get_table('tours').to_frame(),
                         label=trace_label,
                         columns=trace_columns,
                         warn_if_empty=True)
예제 #17
0
def joint_tour_frequency(households, persons, chunk_size, trace_hh_id):
    """
    This model predicts the frequency of making fully joint trips (see the
    alternatives above).
    """
    trace_label = 'joint_tour_frequency'
    model_settings_file_name = 'joint_tour_frequency.yaml'

    estimator = estimation.manager.begin_estimation('joint_tour_frequency')

    model_settings = config.read_model_settings(model_settings_file_name)

    alternatives = simulate.read_model_alts(
        'joint_tour_frequency_alternatives.csv', set_index='alt')

    # - only interested in households with more than one cdap travel_active person and
    # - at least one non-preschooler
    households = households.to_frame()
    multi_person_households = households[
        households.participates_in_jtf_model].copy()

    # - only interested in persons in multi_person_households
    # FIXME - gratuitous pathological efficiency move, just let yaml specify persons?
    persons = persons.to_frame()
    persons = persons[persons.household_id.isin(multi_person_households.index)]

    logger.info(
        "Running joint_tour_frequency with %d multi-person households" %
        multi_person_households.shape[0])

    # - preprocessor
    preprocessor_settings = model_settings.get('preprocessor', None)
    if preprocessor_settings:

        locals_dict = {
            'persons': persons,
            'hh_time_window_overlap': hh_time_window_overlap
        }

        expressions.assign_columns(df=multi_person_households,
                                   model_settings=preprocessor_settings,
                                   locals_dict=locals_dict,
                                   trace_label=trace_label)

    model_spec = simulate.read_model_spec(file_name=model_settings['SPEC'])
    coefficients_df = simulate.read_model_coefficients(model_settings)
    model_spec = simulate.eval_coefficients(model_spec, coefficients_df,
                                            estimator)

    nest_spec = config.get_logit_model_settings(model_settings)
    constants = config.get_model_constants(model_settings)

    if estimator:
        estimator.write_spec(model_settings)
        estimator.write_model_settings(model_settings,
                                       model_settings_file_name)
        estimator.write_coefficients(coefficients_df, model_settings)
        estimator.write_choosers(multi_person_households)

    choices = simulate.simple_simulate(
        choosers=multi_person_households,
        spec=model_spec,
        nest_spec=nest_spec,
        locals_d=constants,
        chunk_size=chunk_size,
        trace_label=trace_label,
        trace_choice_name='joint_tour_frequency',
        estimator=estimator)

    # convert indexes to alternative names
    choices = pd.Series(model_spec.columns[choices.values],
                        index=choices.index)

    if estimator:
        estimator.write_choices(choices)
        choices = estimator.get_survey_values(choices, 'households',
                                              'joint_tour_frequency')
        estimator.write_override_choices(choices)
        estimator.end_estimation()

    # - create joint_tours based on joint_tour_frequency choices

    # - we need a person_id in order to generate the tour index (and for register_traceable_table)
    # - but we don't know the tour participants yet
    # - so we arbitrarily choose the first person in the household
    # - to be point person for the purpose of generating an index and setting origin
    temp_point_persons = persons.loc[persons.PNUM == 1]
    temp_point_persons['person_id'] = temp_point_persons.index
    temp_point_persons = temp_point_persons.set_index('household_id')
    temp_point_persons = temp_point_persons[['person_id', 'home_zone_id']]

    joint_tours = \
        process_joint_tours(choices, alternatives, temp_point_persons)

    tours = pipeline.extend_table("tours", joint_tours)

    tracing.register_traceable_table('tours', joint_tours)
    pipeline.get_rn_generator().add_channel('tours', joint_tours)

    # - annotate households

    # we expect there to be an alt with no tours - which we can use to backfill non-travelers
    no_tours_alt = (alternatives.sum(axis=1) == 0).index[0]
    households['joint_tour_frequency'] = choices.reindex(
        households.index).fillna(no_tours_alt).astype(str)

    households['num_hh_joint_tours'] = joint_tours.groupby('household_id').size().\
        reindex(households.index).fillna(0).astype(np.int8)

    pipeline.replace_table("households", households)

    tracing.print_summary('joint_tour_frequency',
                          households.joint_tour_frequency,
                          value_counts=True)

    if trace_hh_id:
        tracing.trace_df(households, label="joint_tour_frequency.households")

        tracing.trace_df(joint_tours,
                         label="joint_tour_frequency.joint_tours",
                         slicer='household_id')

    if estimator:
        survey_tours = estimation.manager.get_survey_table('tours')
        survey_tours = survey_tours[survey_tours.tour_category == 'joint']

        print(f"len(survey_tours) {len(survey_tours)}")
        print(f"len(joint_tours) {len(joint_tours)}")

        different = False
        survey_tours_not_in_tours = survey_tours[~survey_tours.index.
                                                 isin(joint_tours.index)]
        if len(survey_tours_not_in_tours) > 0:
            print(f"survey_tours_not_in_tours\n{survey_tours_not_in_tours}")
            different = True
        tours_not_in_survey_tours = joint_tours[~joint_tours.index.
                                                isin(survey_tours.index)]
        if len(survey_tours_not_in_tours) > 0:
            print(f"tours_not_in_survey_tours\n{tours_not_in_survey_tours}")
            different = True
        assert not different
예제 #18
0
def parking_location(trips, trips_merged, land_use, network_los, chunk_size,
                     trace_hh_id):
    """
    Given a set of trips, each trip needs to have a parking location if
    it is eligible for remote parking.
    """

    trace_label = 'parking_location'
    model_settings = config.read_model_settings('parking_location_choice.yaml')
    alt_destination_col_name = model_settings['ALT_DEST_COL_NAME']

    preprocessor_settings = model_settings.get('PREPROCESSOR', None)

    trips_df = trips.to_frame()
    trips_merged_df = trips_merged.to_frame()
    land_use_df = land_use.to_frame()

    locals_dict = {'network_los': network_los}
    locals_dict.update(config.get_model_constants(model_settings))

    if preprocessor_settings:
        expressions.assign_columns(df=trips_merged_df,
                                   model_settings=preprocessor_settings,
                                   locals_dict=locals_dict,
                                   trace_label=trace_label)

    parking_locations, save_sample_df = run_parking_destination(
        model_settings,
        trips_merged_df,
        land_use_df,
        chunk_size=chunk_size,
        trace_hh_id=trace_hh_id,
        trace_label=trace_label,
    )

    assign_in_place(trips_df,
                    parking_locations.to_frame(alt_destination_col_name))

    pipeline.replace_table("trips", trips_df)

    if trace_hh_id:
        tracing.trace_df(trips_df,
                         label=trace_label,
                         slicer='trip_id',
                         index_label='trip_id',
                         warn_if_empty=True)

    if save_sample_df is not None:
        assert len(save_sample_df.index.get_level_values(0).unique()) == \
               len(trips_df[trips_df.trip_num < trips_df.trip_count])

        sample_table_name = model_settings.get(
            'PARKING_LOCATION_SAMPLE_TABLE_NAME')
        assert sample_table_name is not None

        logger.info("adding %s samples to %s" %
                    (len(save_sample_df), sample_table_name))

        # lest they try to put tour samples into the same table
        if pipeline.is_table(sample_table_name):
            raise RuntimeError("sample table %s already exists" %
                               sample_table_name)
        pipeline.extend_table(sample_table_name, save_sample_df)
예제 #19
0
def trip_destination(trips, tours_merged, chunk_size, trace_hh_id):
    """
    Choose a destination for all 'intermediate' trips based on trip purpose.

    Final trips already have a destination (the primary tour destination for outbound trips,
    and home for inbound trips.)


    """
    trace_label = 'trip_destination'

    model_settings_file_name = 'trip_destination.yaml'
    model_settings = config.read_model_settings(model_settings_file_name)

    CLEANUP = model_settings.get('CLEANUP', True)
    fail_some_trips_for_testing = model_settings.get(
        'fail_some_trips_for_testing', False)

    trips_df = trips.to_frame()
    tours_merged_df = tours_merged.to_frame()

    estimator = estimation.manager.begin_estimation('trip_destination')

    if estimator:
        estimator.write_coefficients(model_settings=model_settings)
        # estimator.write_spec(model_settings, tag='SAMPLE_SPEC')
        estimator.write_spec(model_settings, tag='SPEC')
        estimator.set_alt_id(model_settings["ALT_DEST_COL_NAME"])
        estimator.write_table(inject.get_injectable('size_terms'),
                              'size_terms',
                              append=False)
        estimator.write_table(inject.get_table('land_use').to_frame(),
                              'landuse',
                              append=False)
        estimator.write_model_settings(model_settings,
                                       model_settings_file_name)

    logger.info("Running %s with %d trips", trace_label, trips_df.shape[0])

    trips_df, save_sample_df = run_trip_destination(
        trips_df,
        tours_merged_df,
        estimator=estimator,
        chunk_size=chunk_size,
        trace_hh_id=trace_hh_id,
        trace_label=trace_label,
        fail_some_trips_for_testing=fail_some_trips_for_testing)

    # testing feature t0 make sure at least one trip fails so trip_purpose_and_destination model is run
    if config.setting('testing_fail_trip_destination',
                      False) and not trips_df.failed.any():
        if (trips_df.trip_num < trips_df.trip_count).sum() == 0:
            raise RuntimeError(
                f"can't honor 'testing_fail_trip_destination' setting because no intermediate trips"
            )

        fail_o = trips_df[trips_df.trip_num < trips_df.trip_count].origin.max()
        trips_df.failed = (trips_df.origin == fail_o) & \
                          (trips_df.trip_num < trips_df.trip_count)

    if trips_df.failed.any():
        logger.warning("%s %s failed trips", trace_label,
                       trips_df.failed.sum())
        if inject.get_injectable('pipeline_file_prefix', None):
            file_name = f"{trace_label}_failed_trips_{inject.get_injectable('pipeline_file_prefix')}"
        else:
            file_name = f"{trace_label}_failed_trips"
        logger.info("writing failed trips to %s", file_name)
        tracing.write_csv(trips_df[trips_df.failed],
                          file_name=file_name,
                          transpose=False)

    if estimator:
        estimator.end_estimation()
        # no trips should have failed since we overwrite choices and sample should have not failed trips
        assert not trips_df.failed.any()

    if CLEANUP:

        if trips_df.failed.any():
            flag_failed_trip_leg_mates(trips_df, 'failed')

            if save_sample_df is not None:
                save_sample_df.drop(trips_df.index[trips_df.failed],
                                    level='trip_id',
                                    inplace=True)

            trips_df = cleanup_failed_trips(trips_df)

        trips_df.drop(columns='failed', inplace=True, errors='ignore')

    pipeline.replace_table("trips", trips_df)

    if trace_hh_id:
        tracing.trace_df(trips_df,
                         label=trace_label,
                         slicer='trip_id',
                         index_label='trip_id',
                         warn_if_empty=True)

    if save_sample_df is not None:
        # might be none if want_sample_table but there are no intermediate trips
        # expect samples only for intermediate trip destinations

        assert len(save_sample_df.index.get_level_values(0).unique()) == \
               len(trips_df[trips_df.trip_num < trips_df.trip_count])

        sample_table_name = model_settings.get('DEST_CHOICE_SAMPLE_TABLE_NAME')
        assert sample_table_name is not None

        logger.info("adding %s samples to %s" %
                    (len(save_sample_df), sample_table_name))

        # lest they try to put tour samples into the same table
        if pipeline.is_table(sample_table_name):
            raise RuntimeError("sample table %s already exists" %
                               sample_table_name)
        pipeline.extend_table(sample_table_name, save_sample_df)
예제 #20
0
def stop_frequency(tours, tours_merged, stop_frequency_alts, network_los,
                   chunk_size, trace_hh_id):
    """
    stop frequency model

    For each tour, shoose a number of intermediate inbound stops and outbound stops.
    Create a trip table with inbound and outbound trips.

    Thus, a tour with stop_frequency '2out_0in' will have two outbound and zero inbound stops,
    and four corresponding trips: three outbound, and one inbound.

    Adds stop_frequency str column to trips, with fields

    creates trips table with columns:

    ::

        - person_id
        - household_id
        - tour_id
        - primary_purpose
        - atwork
        - trip_num
        - outbound
        - trip_count

    """

    trace_label = 'stop_frequency'
    model_settings_file_name = 'stop_frequency.yaml'

    model_settings = config.read_model_settings(model_settings_file_name)

    tours = tours.to_frame()
    tours_merged = tours_merged.to_frame()
    assert not tours_merged.household_id.isnull().any()
    assert not (tours_merged.origin == -1).any()
    assert not (tours_merged.destination == -1).any()

    nest_spec = config.get_logit_model_settings(model_settings)
    constants = config.get_model_constants(model_settings)

    # - run preprocessor to annotate tours_merged
    preprocessor_settings = model_settings.get('preprocessor', None)
    if preprocessor_settings:

        # hack: preprocessor adds origin column in place if it does not exist already
        assert 'origin' in tours_merged
        assert 'destination' in tours_merged
        od_skim_stack_wrapper = network_los.get_default_skim_dict().wrap(
            'origin', 'destination')
        skims = [od_skim_stack_wrapper]

        locals_dict = {
            "od_skims": od_skim_stack_wrapper,
            'network_los': network_los
        }
        locals_dict.update(constants)

        simulate.set_skim_wrapper_targets(tours_merged, skims)

        # this should be pre-slice as some expressions may count tours by type
        annotations = expressions.compute_columns(
            df=tours_merged,
            model_settings=preprocessor_settings,
            locals_dict=locals_dict,
            trace_label=trace_label)

        assign_in_place(tours_merged, annotations)

    tracing.print_summary('stop_frequency segments',
                          tours_merged.primary_purpose,
                          value_counts=True)

    spec_segments = model_settings.get('SPEC_SEGMENTS')
    assert spec_segments is not None, f"SPEC_SEGMENTS setting not found in model settings: {model_settings_file_name}"
    segment_col = model_settings.get('SEGMENT_COL')
    assert segment_col is not None, f"SEGMENT_COL setting not found in model settings: {model_settings_file_name}"

    nest_spec = config.get_logit_model_settings(model_settings)

    choices_list = []
    for segment_settings in spec_segments:

        segment_name = segment_settings[segment_col]
        segment_value = segment_settings[segment_col]

        chooser_segment = tours_merged[tours_merged[segment_col] ==
                                       segment_value]

        if len(chooser_segment) == 0:
            logging.info(
                f"{trace_label} skipping empty segment {segment_name}")
            continue

        logging.info(
            f"{trace_label} running segment {segment_name} with {chooser_segment.shape[0]} chooser rows"
        )

        estimator = estimation.manager.begin_estimation(
            model_name=segment_name, bundle_name='stop_frequency')

        segment_spec = simulate.read_model_spec(
            file_name=segment_settings['SPEC'])
        assert segment_spec is not None, "spec for segment_type %s not found" % segment_name

        coefficients_file_name = segment_settings['COEFFICIENTS']
        coefficients_df = simulate.read_model_coefficients(
            file_name=coefficients_file_name)
        segment_spec = simulate.eval_coefficients(segment_spec,
                                                  coefficients_df, estimator)

        if estimator:
            estimator.write_spec(segment_settings, bundle_directory=False)
            estimator.write_model_settings(model_settings,
                                           model_settings_file_name,
                                           bundle_directory=True)
            estimator.write_coefficients(coefficients_df, segment_settings)
            estimator.write_choosers(chooser_segment)

            estimator.set_chooser_id(chooser_segment.index.name)

        choices = simulate.simple_simulate(
            choosers=chooser_segment,
            spec=segment_spec,
            nest_spec=nest_spec,
            locals_d=constants,
            chunk_size=chunk_size,
            trace_label=tracing.extend_trace_label(trace_label, segment_name),
            trace_choice_name='stops',
            estimator=estimator)

        # convert indexes to alternative names
        choices = pd.Series(segment_spec.columns[choices.values],
                            index=choices.index)

        if estimator:
            estimator.write_choices(choices)
            choices = estimator.get_survey_values(
                choices, 'tours', 'stop_frequency')  # override choices
            estimator.write_override_choices(choices)
            estimator.end_estimation()

        choices_list.append(choices)

    choices = pd.concat(choices_list)

    tracing.print_summary('stop_frequency', choices, value_counts=True)

    # add stop_frequency choices to tours table
    assign_in_place(tours, choices.to_frame('stop_frequency'))

    # FIXME should have added this when tours created?
    assert 'primary_purpose' not in tours
    if 'primary_purpose' not in tours.columns:
        # if not already there, then it will have been added by annotate tours preprocessor
        assign_in_place(tours, tours_merged[['primary_purpose']])

    pipeline.replace_table("tours", tours)

    # create trips table
    trips = process_trips(tours, stop_frequency_alts)
    trips = pipeline.extend_table("trips", trips)
    tracing.register_traceable_table('trips', trips)
    pipeline.get_rn_generator().add_channel('trips', trips)

    if estimator:
        # make sure they created trips with the expected tour_ids
        columns = ['person_id', 'household_id', 'tour_id', 'outbound']

        survey_trips = estimation.manager.get_survey_table(table_name='trips')
        different = False
        survey_trips_not_in_trips = survey_trips[~survey_trips.index.
                                                 isin(trips.index)]
        if len(survey_trips_not_in_trips) > 0:
            print(f"survey_trips_not_in_trips\n{survey_trips_not_in_trips}")
            different = True
        trips_not_in_survey_trips = trips[~trips.index.isin(survey_trips.index
                                                            )]
        if len(survey_trips_not_in_trips) > 0:
            print(f"trips_not_in_survey_trips\n{trips_not_in_survey_trips}")
            different = True
        assert not different

        survey_trips = \
            estimation.manager.get_survey_values(trips,
                                                 table_name='trips',
                                                 column_names=columns)

        trips_differ = (trips[columns] != survey_trips[columns]).any(axis=1)

        if trips_differ.any():
            print("trips_differ\n%s" % trips_differ)
            print("%s of %s tours differ" %
                  (trips_differ.sum(), len(trips_differ)))
            print("differing survey_trips\n%s" % survey_trips[trips_differ])
            print("differing modeled_trips\n%s" % trips[columns][trips_differ])

        assert (not trips_differ.any())

    if trace_hh_id:
        tracing.trace_df(tours,
                         label="stop_frequency.tours",
                         slicer='person_id',
                         columns=None)

        tracing.trace_df(trips,
                         label="stop_frequency.trips",
                         slicer='person_id',
                         columns=None)

        tracing.trace_df(annotations,
                         label="stop_frequency.annotations",
                         columns=None)

        tracing.trace_df(tours_merged,
                         label="stop_frequency.tours_merged",
                         slicer='person_id',
                         columns=None)
예제 #21
0
def non_mandatory_tour_destination(
        tours,
        persons_merged,
        skim_dict, skim_stack,
        chunk_size,
        trace_hh_id):

    """
    Given the tour generation from the above, each tour needs to have a
    destination, so in this case tours are the choosers (with the associated
    person that's making the tour)
    """

    trace_label = 'non_mandatory_tour_destination'
    model_settings_file_name = 'non_mandatory_tour_destination.yaml'
    model_settings = config.read_model_settings(model_settings_file_name)

    logsum_column_name = model_settings.get('DEST_CHOICE_LOGSUM_COLUMN_NAME')
    want_logsums = logsum_column_name is not None

    sample_table_name = model_settings.get('DEST_CHOICE_SAMPLE_TABLE_NAME')
    want_sample_table = config.setting('want_dest_choice_sample_tables') and sample_table_name is not None

    tours = tours.to_frame()

    persons_merged = persons_merged.to_frame()

    # choosers are tours - in a sense tours are choosing their destination
    non_mandatory_tours = tours[tours.tour_category == 'non_mandatory']

    # - if no mandatory_tours
    if non_mandatory_tours.shape[0] == 0:
        tracing.no_results(trace_label)
        return

    estimator = estimation.manager.begin_estimation('non_mandatory_tour_destination')
    if estimator:
        estimator.write_coefficients(simulate.read_model_coefficients(model_settings))
        # estimator.write_spec(model_settings, tag='SAMPLE_SPEC')
        estimator.write_spec(model_settings, tag='SPEC')
        estimator.set_alt_id(model_settings["ALT_DEST_COL_NAME"])
        estimator.write_table(inject.get_injectable('size_terms'), 'size_terms', append=False)
        estimator.write_table(inject.get_table('land_use').to_frame(), 'landuse', append=False)
        estimator.write_model_settings(model_settings, model_settings_file_name)

    choices_df, save_sample_df = tour_destination.run_tour_destination(
        non_mandatory_tours,
        persons_merged,
        want_logsums,
        want_sample_table,
        model_settings,
        skim_dict,
        skim_stack,
        estimator,
        chunk_size, trace_hh_id, trace_label)

    if estimator:
        estimator.write_choices(choices_df.choice)
        choices_df.choice = estimator.get_survey_values(choices_df.choice, 'tours', 'destination')
        estimator.write_override_choices(choices_df.choice)
        estimator.end_estimation()

    non_mandatory_tours['destination'] = choices_df.choice

    assign_in_place(tours, non_mandatory_tours[['destination']])

    if want_logsums:
        non_mandatory_tours[logsum_column_name] = choices_df['logsum']
        assign_in_place(tours, non_mandatory_tours[[logsum_column_name]])

    pipeline.replace_table("tours", tours)

    if want_sample_table:
        assert len(save_sample_df.index.get_level_values(0).unique()) == len(choices_df)
        pipeline.extend_table(sample_table_name, save_sample_df)

    if trace_hh_id:
        tracing.trace_df(tours[tours.tour_category == 'non_mandatory'],
                         label="non_mandatory_tour_destination",
                         slicer='person_id',
                         index_label='tour',
                         columns=None,
                         warn_if_empty=True)
예제 #22
0
def trip_destination(
        trips,
        tours_merged,
        chunk_size, trace_hh_id):
    """
    Choose a destination for all 'intermediate' trips based on trip purpose.

    Final trips already have a destination (the primary tour destination for outbound trips,
    and home for inbound trips.)


    """
    trace_label = 'trip_destination'
    model_settings = config.read_model_settings('trip_destination.yaml')
    CLEANUP = model_settings.get('CLEANUP', True)
    fail_some_trips_for_testing = model_settings.get('fail_some_trips_for_testing', False)

    trips_df = trips.to_frame()
    tours_merged_df = tours_merged.to_frame()

    logger.info("Running %s with %d trips", trace_label, trips_df.shape[0])

    trips_df, save_sample_df = run_trip_destination(
        trips_df,
        tours_merged_df,
        chunk_size=chunk_size,
        trace_hh_id=trace_hh_id,
        trace_label=trace_label,
        fail_some_trips_for_testing=fail_some_trips_for_testing)

    # testing feature t0 make sure at least one trip fails so trip_purpose_and_destination model is run
    if config.setting('testing_fail_trip_destination', False) and not trips_df.failed.any():
        fail_o = trips_df[trips_df.trip_num < trips_df.trip_count].origin.max()
        trips_df.failed = (trips_df.origin == fail_o) & \
                          (trips_df.trip_num < trips_df.trip_count)

    if trips_df.failed.any():
        logger.warning("%s %s failed trips", trace_label, trips_df.failed.sum())
        file_name = "%s_failed_trips" % trace_label
        logger.info("writing failed trips to %s", file_name)
        tracing.write_csv(trips_df[trips_df.failed], file_name=file_name, transpose=False)

    if CLEANUP:

        if trips_df.failed.any():
            flag_failed_trip_leg_mates(trips_df, 'failed')

            if save_sample_df is not None:
                save_sample_df.drop(trips_df.index[trips_df.failed], level='trip_id', inplace=True)

            trips_df = cleanup_failed_trips(trips_df)

        trips_df.drop(columns='failed', inplace=True, errors='ignore')

    pipeline.replace_table("trips", trips_df)

    if trace_hh_id:
        tracing.trace_df(trips_df,
                         label=trace_label,
                         slicer='trip_id',
                         index_label='trip_id',
                         warn_if_empty=True)

    if save_sample_df is not None:
        # might be none if want_sample_table but there are no intermediate trips
        # expect samples only for intermediate trip destinations

        assert len(save_sample_df.index.get_level_values(0).unique()) == \
               len(trips_df[trips_df.trip_num < trips_df.trip_count])

        sample_table_name = model_settings.get('DEST_CHOICE_SAMPLE_TABLE_NAME')
        assert sample_table_name is not None

        logger.info("adding %s samples to %s" % (len(save_sample_df), sample_table_name))

        # lest they try to put tour samples into the same table
        if pipeline.is_table(sample_table_name):
            raise RuntimeError("sample table %s already exists" % sample_table_name)
        pipeline.extend_table(sample_table_name, save_sample_df)
예제 #23
0
def non_mandatory_tour_frequency(persons, persons_merged, chunk_size,
                                 trace_hh_id):
    """
    This model predicts the frequency of making non-mandatory trips
    (alternatives for this model come from a separate csv file which is
    configured by the user) - these trips include escort, shopping, othmaint,
    othdiscr, eatout, and social trips in various combination.
    """

    trace_label = 'non_mandatory_tour_frequency'
    model_settings_file_name = 'non_mandatory_tour_frequency.yaml'

    model_settings = config.read_model_settings(model_settings_file_name)

    # FIXME kind of tacky both that we know to add this here and del it below
    # 'tot_tours' is used in model_spec expressions
    alternatives = simulate.read_model_alts(
        'non_mandatory_tour_frequency_alternatives.csv', set_index=None)
    alternatives['tot_tours'] = alternatives.sum(axis=1)

    # filter based on results of CDAP
    choosers = persons_merged.to_frame()
    choosers = choosers[choosers.cdap_activity.isin(['M', 'N'])]

    # - preprocessor
    preprocessor_settings = model_settings.get('preprocessor', None)
    if preprocessor_settings:

        locals_dict = {'person_max_window': person_max_window}

        expressions.assign_columns(df=choosers,
                                   model_settings=preprocessor_settings,
                                   locals_dict=locals_dict,
                                   trace_label=trace_label)

    logger.info("Running non_mandatory_tour_frequency with %d persons",
                len(choosers))

    constants = config.get_model_constants(model_settings)

    model_spec = simulate.read_model_spec(file_name=model_settings['SPEC'])
    spec_segments = model_settings.get('SPEC_SEGMENTS', {})

    # segment by person type and pick the right spec for each person type
    choices_list = []
    for segment_settings in spec_segments:

        segment_name = segment_settings['NAME']
        ptype = segment_settings['PTYPE']

        # pick the spec column for the segment
        segment_spec = model_spec[[segment_name]]

        chooser_segment = choosers[choosers.ptype == ptype]

        logger.info("Running segment '%s' of size %d", segment_name,
                    len(chooser_segment))

        if len(chooser_segment) == 0:
            # skip empty segments
            continue

        estimator = \
            estimation.manager.begin_estimation(model_name=segment_name, bundle_name='non_mandatory_tour_frequency')

        coefficients_df = simulate.read_model_coefficients(segment_settings)
        segment_spec = simulate.eval_coefficients(segment_spec,
                                                  coefficients_df, estimator)

        if estimator:
            estimator.write_spec(model_settings, bundle_directory=True)
            estimator.write_model_settings(model_settings,
                                           model_settings_file_name,
                                           bundle_directory=True)
            # preserving coefficients file name makes bringing back updated coefficients more straightforward
            estimator.write_coefficients(coefficients_df, segment_settings)
            estimator.write_choosers(chooser_segment)
            estimator.write_alternatives(alternatives, bundle_directory=True)

            # FIXME #interaction_simulate_estimation_requires_chooser_id_in_df_column
            #  shuold we do it here or have interaction_simulate do it?
            # chooser index must be duplicated in column or it will be omitted from interaction_dataset
            # estimation requires that chooser_id is either in index or a column of interaction_dataset
            # so it can be reformatted (melted) and indexed by chooser_id and alt_id
            assert chooser_segment.index.name == 'person_id'
            assert 'person_id' not in chooser_segment.columns
            chooser_segment['person_id'] = chooser_segment.index

            # FIXME set_alt_id - do we need this for interaction_simulate estimation bundle tables?
            estimator.set_alt_id('alt_id')

            estimator.set_chooser_id(chooser_segment.index.name)

        choices = interaction_simulate(
            chooser_segment,
            alternatives,
            spec=segment_spec,
            locals_d=constants,
            chunk_size=chunk_size,
            trace_label='non_mandatory_tour_frequency.%s' % segment_name,
            trace_choice_name='non_mandatory_tour_frequency',
            estimator=estimator)

        if estimator:
            estimator.write_choices(choices)
            choices = estimator.get_survey_values(
                choices, 'persons', 'non_mandatory_tour_frequency')
            estimator.write_override_choices(choices)
            estimator.end_estimation()

        choices_list.append(choices)

        # FIXME - force garbage collection?
        force_garbage_collect()

    del alternatives['tot_tours']  # del tot_tours column we added above

    # The choice value 'non_mandatory_tour_frequency' assigned by interaction_simulate
    # is the index value of the chosen alternative in the alternatives table.
    choices = pd.concat(choices_list).sort_index()

    # add non_mandatory_tour_frequency column to persons
    persons = persons.to_frame()
    # we expect there to be an alt with no tours - which we can use to backfill non-travelers
    no_tours_alt = (alternatives.sum(axis=1) == 0).index[0]
    # need to reindex as we only handled persons with cdap_activity in ['M', 'N']
    persons['non_mandatory_tour_frequency'] = \
        choices.reindex(persons.index).fillna(no_tours_alt).astype(np.int8)
    """
    We have now generated non-mandatory tour frequencies, but they are attributes of the person table
    Now we create a "tours" table which has one row per tour that has been generated
    (and the person id it is associated with)

    But before we do that, we run an additional probablilistic step to extend/increase tour counts
    beyond the strict limits of the tour_frequency alternatives chosen above (which are currently limited
    to at most 2 escort tours and 1 each of shopping, othmaint, othdiscr, eatout, and social tours)

    The choice value 'non_mandatory_tour_frequency' assigned by interaction_simulate is simply the
    index value of the chosen alternative in the alternatives table.

    get counts of each of the tour type alternatives (so we can extend)
               escort  shopping  othmaint  othdiscr    eatout    social
    parent_id
    2588676         2         0         0         1         1         0
    2588677         0         1         0         1         0         0
    """

    # counts of each of the tour type alternatives (so we can extend)
    modeled_tour_counts = alternatives.loc[choices]
    modeled_tour_counts.index = choices.index  # assign person ids to the index

    # - extend_tour_counts - probabalistic
    extended_tour_counts = \
        extend_tour_counts(choosers, modeled_tour_counts.copy(), alternatives,
                           trace_hh_id, tracing.extend_trace_label(trace_label, 'extend_tour_counts'))

    num_modeled_tours = modeled_tour_counts.sum().sum()
    num_extended_tours = extended_tour_counts.sum().sum()
    logger.info("extend_tour_counts increased tour count by %s from %s to %s" %
                (num_extended_tours - num_modeled_tours, num_modeled_tours,
                 num_extended_tours))
    """
    create the non_mandatory tours based on extended_tour_counts
    """
    if estimator:
        override_tour_counts = \
            estimation.manager.get_survey_values(extended_tour_counts,
                                                 table_name='persons',
                                                 column_names=['_%s' % c for c in extended_tour_counts.columns])
        override_tour_counts = \
            override_tour_counts.rename(columns={('_%s' % c): c for c in extended_tour_counts.columns})
        logger.info(
            "estimation get_survey_values override_tour_counts %s changed cells"
            % (override_tour_counts != extended_tour_counts).sum().sum())
        extended_tour_counts = override_tour_counts
    """
    create the non_mandatory tours based on extended_tour_counts
    """
    non_mandatory_tours = process_non_mandatory_tours(persons,
                                                      extended_tour_counts)
    assert len(non_mandatory_tours) == extended_tour_counts.sum().sum()

    if estimator:

        # make sure they created the right tours
        survey_tours = estimation.manager.get_survey_table(
            'tours').sort_index()
        non_mandatory_survey_tours = survey_tours[survey_tours.tour_category ==
                                                  'non_mandatory']
        assert len(non_mandatory_survey_tours) == len(non_mandatory_tours)
        assert non_mandatory_survey_tours.index.equals(
            non_mandatory_tours.sort_index().index)

        # make sure they created tours with the expected tour_ids
        columns = ['person_id', 'household_id', 'tour_type', 'tour_category']
        survey_tours = \
            estimation.manager.get_survey_values(non_mandatory_tours,
                                                 table_name='tours',
                                                 column_names=columns)

        tours_differ = (non_mandatory_tours[columns] !=
                        survey_tours[columns]).any(axis=1)

        if tours_differ.any():
            print("tours_differ\n%s" % tours_differ)
            print("%s of %s tours differ" %
                  (tours_differ.sum(), len(tours_differ)))
            print("differing survey_tours\n%s" % survey_tours[tours_differ])
            print("differing modeled_tours\n%s" %
                  non_mandatory_tours[columns][tours_differ])

        assert (not tours_differ.any())

    pipeline.extend_table("tours", non_mandatory_tours)

    tracing.register_traceable_table('tours', non_mandatory_tours)
    pipeline.get_rn_generator().add_channel('tours', non_mandatory_tours)

    expressions.assign_columns(
        df=persons,
        model_settings=model_settings.get('annotate_persons'),
        trace_label=trace_label)

    pipeline.replace_table("persons", persons)

    tracing.print_summary('non_mandatory_tour_frequency',
                          persons.non_mandatory_tour_frequency,
                          value_counts=True)

    if trace_hh_id:
        tracing.trace_df(
            non_mandatory_tours,
            label="non_mandatory_tour_frequency.non_mandatory_tours",
            warn_if_empty=True)

        tracing.trace_df(choosers,
                         label="non_mandatory_tour_frequency.choosers",
                         warn_if_empty=True)

        tracing.trace_df(
            persons,
            label="non_mandatory_tour_frequency.annotated_persons",
            warn_if_empty=True)
def non_mandatory_tour_frequency(persons, persons_merged,
                                 chunk_size,
                                 trace_hh_id):
    """
    This model predicts the frequency of making non-mandatory trips
    (alternatives for this model come from a separate csv file which is
    configured by the user) - these trips include escort, shopping, othmaint,
    othdiscr, eatout, and social trips in various combination.
    """

    trace_label = 'non_mandatory_tour_frequency'
    model_settings = config.read_model_settings('non_mandatory_tour_frequency.yaml')
    model_spec = simulate.read_model_spec(file_name='non_mandatory_tour_frequency.csv')

    alternatives = simulate.read_model_alts(
        config.config_file_path('non_mandatory_tour_frequency_alternatives.csv'),
        set_index=None)

    choosers = persons_merged.to_frame()

    # FIXME kind of tacky both that we know to add this here and del it below
    # 'tot_tours' is used in model_spec expressions
    alternatives['tot_tours'] = alternatives.sum(axis=1)

    # - preprocessor
    preprocessor_settings = model_settings.get('preprocessor', None)
    if preprocessor_settings:

        locals_dict = {
            'person_max_window': person_max_window
        }

        expressions.assign_columns(
            df=choosers,
            model_settings=preprocessor_settings,
            locals_dict=locals_dict,
            trace_label=trace_label)

    # filter based on results of CDAP
    choosers = choosers[choosers.cdap_activity.isin(['M', 'N'])]

    logger.info("Running non_mandatory_tour_frequency with %d persons", len(choosers))

    constants = config.get_model_constants(model_settings)

    choices_list = []
    # segment by person type and pick the right spec for each person type
    for ptype, segment in choosers.groupby('ptype'):

        name = PTYPE_NAME[ptype]

        # pick the spec column for the segment
        spec = model_spec[[name]]

        # drop any zero-valued rows
        spec = spec[spec[name] != 0]

        logger.info("Running segment '%s' of size %d", name, len(segment))

        choices = interaction_simulate(
            segment,
            alternatives,
            spec=spec,
            locals_d=constants,
            chunk_size=chunk_size,
            trace_label='non_mandatory_tour_frequency.%s' % name,
            trace_choice_name='non_mandatory_tour_frequency')

        choices_list.append(choices)

        # FIXME - force garbage collection?
        # force_garbage_collect()

    choices = pd.concat(choices_list)

    del alternatives['tot_tours']  # del tot_tours column we added above

    # - add non_mandatory_tour_frequency column to persons
    persons = persons.to_frame()
    # need to reindex as we only handled persons with cdap_activity in ['M', 'N']
    # (we expect there to be an alt with no tours - which we can use to backfill non-travelers)
    no_tours_alt = (alternatives.sum(axis=1) == 0).index[0]
    persons['non_mandatory_tour_frequency'] = \
        choices.reindex(persons.index).fillna(no_tours_alt).astype(np.int8)

    """
    We have now generated non-mandatory tours, but they are attributes of the person table
    Now we create a "tours" table which has one row per tour that has been generated
    (and the person id it is associated with)
    """

    # - get counts of each of the alternatives (so we can extend)
    # (choices is just the index values for the chosen alts)
    """
               escort  shopping  othmaint  othdiscr    eatout    social
    parent_id
    2588676         2         0         0         1         1         0
    2588677         0         1         0         1         0         0
    """
    tour_counts = alternatives.loc[choices]
    tour_counts.index = choices.index  # assign person ids to the index

    prev_tour_count = tour_counts.sum().sum()

    # - extend_tour_counts
    tour_counts = extend_tour_counts(choosers, tour_counts, alternatives,
                                     trace_hh_id,
                                     tracing.extend_trace_label(trace_label, 'extend_tour_counts'))

    extended_tour_count = tour_counts.sum().sum()

    logging.info("extend_tour_counts increased nmtf tour count by %s from %s to %s" %
                 (extended_tour_count - prev_tour_count, prev_tour_count, extended_tour_count))

    # - create the non_mandatory tours
    non_mandatory_tours = process_non_mandatory_tours(persons, tour_counts)
    assert len(non_mandatory_tours) == extended_tour_count

    pipeline.extend_table("tours", non_mandatory_tours)

    tracing.register_traceable_table('tours', non_mandatory_tours)
    pipeline.get_rn_generator().add_channel('tours', non_mandatory_tours)

    expressions.assign_columns(
        df=persons,
        model_settings=model_settings.get('annotate_persons'),
        trace_label=trace_label)

    pipeline.replace_table("persons", persons)

    tracing.print_summary('non_mandatory_tour_frequency',
                          persons.non_mandatory_tour_frequency, value_counts=True)

    if trace_hh_id:
        tracing.trace_df(non_mandatory_tours,
                         label="non_mandatory_tour_frequency.non_mandatory_tours",
                         warn_if_empty=True)

        tracing.trace_df(choosers,
                         label="non_mandatory_tour_frequency.choosers",
                         warn_if_empty=True)

        tracing.trace_df(persons,
                         label="non_mandatory_tour_frequency.annotated_persons",
                         warn_if_empty=True)