def create_mandatory_tours(trace_hh_id):

    # FIXME - move this to body?

    persons = inject.get_table('persons')
    configs_dir = inject.get_injectable('configs_dir')

    persons = persons.to_frame(columns=["mandatory_tour_frequency",
                                        "is_worker", "school_taz", "workplace_taz"])
    persons = persons[~persons.mandatory_tour_frequency.isnull()]

    tour_frequency_alternatives = inject.get_injectable('mandatory_tour_frequency_alternatives')

    mandatory_tours = process_mandatory_tours(persons, tour_frequency_alternatives)

    expressions.assign_columns(
        df=mandatory_tours,
        model_settings='annotate_tours_with_dest',
        configs_dir=configs_dir,
        trace_label='create_mandatory_tours')

    tours = pipeline.extend_table("tours", mandatory_tours)
    tracing.register_traceable_table('tours', tours)
    pipeline.get_rn_generator().add_channel(mandatory_tours, 'tours')

    if trace_hh_id:
        tracing.trace_df(mandatory_tours,
                         label="mandatory_tour_frequency.mandatory_tours",
                         warn_if_empty=True)
Exemple #2
0
def auto_ownership_simulate(households,
                            households_merged,
                            chunk_size,
                            trace_hh_id):
    """
    Auto ownership is a standard model which predicts how many cars a household
    with given characteristics owns
    """
    trace_label = 'auto_ownership_simulate'
    model_settings_file_name = 'auto_ownership.yaml'
    model_settings = config.read_model_settings(model_settings_file_name)

    estimator = estimation.manager.begin_estimation('auto_ownership')

    model_spec = simulate.read_model_spec(file_name=model_settings['SPEC'])
    coefficients_df = simulate.read_model_coefficients(model_settings)
    model_spec = simulate.eval_coefficients(model_spec, coefficients_df, estimator)

    nest_spec = config.get_logit_model_settings(model_settings)
    constants = config.get_model_constants(model_settings)

    choosers = households_merged.to_frame()

    logger.info("Running %s with %d households", trace_label, len(choosers))

    if estimator:
        estimator.write_model_settings(model_settings, model_settings_file_name)
        estimator.write_spec(model_settings)
        estimator.write_coefficients(coefficients_df)
        estimator.write_choosers(choosers)

    choices = simulate.simple_simulate(
        choosers=choosers,
        spec=model_spec,
        nest_spec=nest_spec,
        locals_d=constants,
        chunk_size=chunk_size,
        trace_label=trace_label,
        trace_choice_name='auto_ownership',
        estimator=estimator)

    if estimator:
        estimator.write_choices(choices)
        choices = estimator.get_survey_values(choices, 'households', 'auto_ownership')
        estimator.write_override_choices(choices)
        estimator.end_estimation()

    households = households.to_frame()

    # no need to reindex as we used all households
    households['auto_ownership'] = choices

    pipeline.replace_table("households", households)

    tracing.print_summary('auto_ownership', households.auto_ownership, value_counts=True)

    if trace_hh_id:
        tracing.trace_df(households,
                         label='auto_ownership',
                         warn_if_empty=True)
Exemple #3
0
def trip_purpose(
        trips,
        chunk_size,
        trace_hh_id):

    """
    trip purpose model step - calls run_trip_purpose to run the actual model

    adds purpose column to trips
    """
    trace_label = "trip_purpose"

    trips_df = trips.to_frame()

    choices = run_trip_purpose(
        trips_df,
        chunk_size=chunk_size,
        trace_hh_id=trace_hh_id,
        trace_label=trace_label
    )

    trips_df['purpose'] = choices

    # we should have assigned a purpose to all trips
    assert not trips_df.purpose.isnull().any()

    pipeline.replace_table("trips", trips_df)

    if trace_hh_id:
        tracing.trace_df(trips_df,
                         label=trace_label,
                         slicer='trip_id',
                         index_label='trip_id',
                         warn_if_empty=True)
Exemple #4
0
 def trace_df(self, df, trace_label, extension):
     assert len(df) > 0
     tracing.trace_df(df,
                      label=tracing.extend_trace_label(
                          trace_label, extension),
                      slicer='NONE',
                      transpose=False)
Exemple #5
0
def individual_utilities(
        persons,
        cdap_indiv_spec,
        locals_d,
        trace_hh_id=None, trace_label=None):
    """
    Calculate CDAP utilities for all individuals.

    Parameters
    ----------
    persons : pandas.DataFrame
        DataFrame of individual persons data.
    cdap_indiv_spec : pandas.DataFrame
        CDAP spec applied to individuals.

    Returns
    -------
    utilities : pandas.DataFrame
        Will have index of `persons` and columns for each of the alternatives.
        plus some 'useful columns' [_hh_id_, _ptype_, 'cdap_rank', _hh_size_]

    """

    # calculate single person utilities
    indiv_utils = simulate.eval_utilities(cdap_indiv_spec, persons, locals_d, trace_label=trace_label)

    # add columns from persons to facilitate building household interactions
    useful_columns = [_hh_id_, _ptype_, 'cdap_rank', _hh_size_]
    indiv_utils[useful_columns] = persons[useful_columns]

    if trace_hh_id:
        tracing.trace_df(indiv_utils, '%s.indiv_utils' % trace_label,
                         column_labels=['activity', 'person'])

    return indiv_utils
Exemple #6
0
def individual_utilities(
        persons,
        cdap_indiv_spec,
        locals_d,
        trace_hh_id=None, trace_label=None):
    """
    Calculate CDAP utilities for all individuals.

    Parameters
    ----------
    persons : pandas.DataFrame
        DataFrame of individual persons data.
    cdap_indiv_spec : pandas.DataFrame
        CDAP spec applied to individuals.

    Returns
    -------
    utilities : pandas.DataFrame
        Will have index of `persons` and columns for each of the alternatives.
        plus some 'useful columns' [_hh_id_, _ptype_, 'cdap_rank', _hh_size_]

    """

    # calculate single person utilities
    indiv_utils = simulate.eval_utilities(cdap_indiv_spec, persons, locals_d, trace_label)

    # add columns from persons to facilitate building household interactions
    useful_columns = [_hh_id_, _ptype_, 'cdap_rank', _hh_size_]
    indiv_utils[useful_columns] = persons[useful_columns]

    if trace_hh_id:
        tracing.trace_df(indiv_utils, '%s.indiv_utils' % trace_label,
                         column_labels=['activity', 'person'])

    return indiv_utils
def auto_ownership_simulate(households_merged, auto_ownership_spec,
                            auto_ownership_settings, trace_hh_id):
    """
    Auto ownership is a standard model which predicts how many cars a household
    with given characteristics owns
    """

    logger.info("Running auto_ownership_simulate with %d households" %
                len(households_merged))

    nest_spec = config.get_logit_model_settings(auto_ownership_settings)
    constants = config.get_model_constants(auto_ownership_settings)

    choices = asim.simple_simulate(choosers=households_merged.to_frame(),
                                   spec=auto_ownership_spec,
                                   nest_spec=nest_spec,
                                   locals_d=constants,
                                   trace_label=trace_hh_id
                                   and 'auto_ownership',
                                   trace_choice_name='auto_ownership')

    tracing.print_summary('auto_ownership', choices, value_counts=True)

    orca.add_column('households', 'auto_ownership', choices)

    pipeline.add_dependent_columns('households', 'households_autoown')

    if trace_hh_id:
        trace_columns = ['auto_ownership'
                         ] + orca.get_table('households_autoown').columns
        tracing.trace_df(orca.get_table('households').to_frame(),
                         label='auto_ownership',
                         columns=trace_columns,
                         warn_if_empty=True)
Exemple #8
0
def free_parking(
        persons_merged, persons, households,
        skim_dict, skim_stack,
        chunk_size, trace_hh_id, locutor):
    """

    """

    trace_label = 'free_parking'
    model_settings = config.read_model_settings('free_parking.yaml')

    choosers = persons_merged.to_frame()
    choosers = choosers[choosers.workplace_taz > -1]

    logger.info("Running %s with %d persons", trace_label, len(choosers))

    constants = config.get_model_constants(model_settings)

    # - preprocessor
    preprocessor_settings = model_settings.get('preprocessor', None)
    if preprocessor_settings:

        locals_d = {}
        if constants is not None:
            locals_d.update(constants)

        expressions.assign_columns(
            df=choosers,
            model_settings=preprocessor_settings,
            locals_dict=locals_d,
            trace_label=trace_label)

    model_spec = simulate.read_model_spec(file_name='free_parking.csv')
    nest_spec = config.get_logit_model_settings(model_settings)

    choices = simulate.simple_simulate(
        choosers=choosers,
        spec=model_spec,
        nest_spec=nest_spec,
        locals_d=constants,
        chunk_size=chunk_size,
        trace_label=trace_label,
        trace_choice_name='free_parking_at_work')

    persons = persons.to_frame()

    # no need to reindex as we used all households
    free_parking_alt = model_settings['FREE_PARKING_ALT']
    choices = (choices == free_parking_alt)
    persons['free_parking_at_work'] = choices.reindex(persons.index).fillna(0).astype(bool)

    pipeline.replace_table("persons", persons)

    tracing.print_summary('free_parking', persons.free_parking_at_work, value_counts=True)

    if trace_hh_id:
        tracing.trace_df(persons,
                         label=trace_label,
                         warn_if_empty=True)
def create_simple_trips(tours, households, persons, trace_hh_id):
    """
    Create a simple trip table
    """

    logger.info("Running simple trips table creation with %d tours" %
                len(tours.index))

    tours_df = tours.to_frame()

    # we now have a tour_id column
    tours_df.reset_index(inplace=True)

    tours_df['household_id'] = reindex(persons.household_id,
                                       tours_df.person_id)
    tours_df['TAZ'] = reindex(households.TAZ, tours_df.household_id)

    # create inbound and outbound records
    trips = pd.concat([tours_df, tours_df], ignore_index=True)

    # first half are outbound, second half are inbound
    trips['INBOUND'] = np.repeat([False, True], len(trips.index) / 2)

    # TRIPID for outbound trips = 1, inbound_trips = 2
    trips['trip_num'] = np.repeat([1, 2], len(trips.index) / 2)

    # set key fields from tour fields: 'TAZ','destination','start','end'
    trips['OTAZ'] = trips.TAZ
    trips['OTAZ'][trips.INBOUND] = trips.destination[trips.INBOUND]

    trips['DTAZ'] = trips.destination
    trips['DTAZ'][trips.INBOUND] = trips.TAZ[trips.INBOUND]

    trips['start_trip'] = trips.start
    trips['start_trip'][trips.INBOUND] = trips.end[trips.INBOUND]

    trips['end_trip'] = trips.end
    trips['end_trip'][trips.INBOUND] = trips.start[trips.INBOUND]

    # create a stable (predictable) index based on tour_id and trip_num
    possible_trips_count = 2
    trips['trip_id'] = (trips.tour_id *
                        possible_trips_count) + (trips.trip_num - 1)
    trips.set_index('trip_id', inplace=True, verify_integrity=True)

    trip_columns = [
        'tour_id', 'INBOUND', 'trip_num', 'OTAZ', 'DTAZ', 'start_trip',
        'end_trip'
    ]
    trips = trips[trip_columns]

    orca.add_table("trips", trips)

    tracing.register_traceable_table('trips', trips)
    pipeline.get_rn_generator().add_channel(trips, 'trips')

    if trace_hh_id:
        tracing.trace_df(trips, label="trips", warn_if_empty=True)
Exemple #10
0
def free_parking(persons_merged, persons, households, skim_dict, skim_stack,
                 chunk_size, trace_hh_id, locutor):
    """

    """

    trace_label = 'free_parking'
    model_settings = config.read_model_settings('free_parking.yaml')

    choosers = persons_merged.to_frame()
    choosers = choosers[choosers.workplace_taz > -1]

    logger.info("Running %s with %d persons", trace_label, len(choosers))

    constants = config.get_model_constants(model_settings)

    # - preprocessor
    preprocessor_settings = model_settings.get('preprocessor', None)
    if preprocessor_settings:

        locals_d = {}
        if constants is not None:
            locals_d.update(constants)

        expressions.assign_columns(df=choosers,
                                   model_settings=preprocessor_settings,
                                   locals_dict=locals_d,
                                   trace_label=trace_label)

    model_spec = simulate.read_model_spec(file_name='free_parking.csv')
    nest_spec = config.get_logit_model_settings(model_settings)

    choices = simulate.simple_simulate(
        choosers=choosers,
        spec=model_spec,
        nest_spec=nest_spec,
        locals_d=constants,
        chunk_size=chunk_size,
        trace_label=trace_label,
        trace_choice_name='free_parking_at_work')

    persons = persons.to_frame()

    # no need to reindex as we used all households
    free_parking_alt = model_settings['FREE_PARKING_ALT']
    choices = (choices == free_parking_alt)
    persons['free_parking_at_work'] = choices.reindex(
        persons.index).fillna(0).astype(bool)

    pipeline.replace_table("persons", persons)

    tracing.print_summary('free_parking',
                          persons.free_parking_at_work,
                          value_counts=True)

    if trace_hh_id:
        tracing.trace_df(persons, label=trace_label, warn_if_empty=True)
def atwork_subtour_destination(
        tours,
        persons_merged,
        skim_dict,
        skim_stack,
        land_use, size_terms,
        chunk_size, trace_hh_id):

    persons_merged = persons_merged.to_frame()

    tours = tours.to_frame()
    subtours = tours[tours.tour_category == 'atwork']

    # - if no atwork subtours
    if subtours.shape[0] == 0:
        tracing.no_results('atwork_subtour_destination')
        return

    # interaction_sample_simulate insists choosers appear in same order as alts
    subtours = subtours.sort_index()

    destination_size_terms = tour_destination_size_terms(land_use, size_terms, 'atwork')

    destination_sample = atwork_subtour_destination_sample(
        subtours,
        persons_merged,
        skim_dict,
        destination_size_terms,
        chunk_size, trace_hh_id)

    destination_sample = atwork_subtour_destination_logsums(
        persons_merged,
        destination_sample,
        skim_dict, skim_stack,
        chunk_size, trace_hh_id)

    choices = atwork_subtour_destination_simulate(
        subtours,
        persons_merged,
        destination_sample,
        skim_dict,
        destination_size_terms,
        chunk_size, trace_hh_id)

    subtours['destination'] = choices

    assign_in_place(tours, subtours[['destination']])

    pipeline.replace_table("tours", tours)

    tracing.print_summary('subtour destination', subtours.destination, describe=True)

    if trace_hh_id:
        tracing.trace_df(tours,
                         label='atwork_subtour_destination',
                         columns=['destination'])
Exemple #12
0
def mandatory_scheduling(mandatory_tours_merged, tdd_alts, tdd_school_spec,
                         tdd_work_spec, mandatory_scheduling_settings,
                         chunk_size, trace_hh_id):
    """
    This model predicts the departure time and duration of each activity for
    mandatory tours
    """

    tours = mandatory_tours_merged.to_frame()
    alts = tdd_alts.to_frame()

    constants = config.get_model_constants(mandatory_scheduling_settings)

    school_spec = tdd_school_spec.to_frame()
    school_tours = tours[tours.tour_type == "school"]

    logger.info("Running mandatory_scheduling school_tours with %d tours" %
                len(school_tours))

    school_choices = vectorize_tour_scheduling(
        school_tours,
        alts,
        school_spec,
        constants=constants,
        chunk_size=chunk_size,
        trace_label='mandatory_scheduling.school')

    work_spec = tdd_work_spec.to_frame()
    work_tours = tours[tours.tour_type == "work"]

    logger.info("Running %d work tour scheduling choices" % len(work_tours))

    work_choices = vectorize_tour_scheduling(
        work_tours,
        alts,
        work_spec,
        constants=constants,
        chunk_size=chunk_size,
        trace_label='mandatory_scheduling.work')

    choices = pd.concat([school_choices, work_choices])

    tracing.print_summary('mandatory_scheduling tour_departure_and_duration',
                          choices,
                          describe=True)

    orca.add_column("mandatory_tours", "tour_departure_and_duration", choices)

    if trace_hh_id:
        tracing.trace_df(orca.get_table('mandatory_tours').to_frame(),
                         label="mandatory_tours",
                         slicer='person_id',
                         index_label='tour',
                         columns=None,
                         warn_if_empty=True)
Exemple #13
0
def mandatory_tour_scheduling(tours, persons_merged, tdd_alts, chunk_size,
                              trace_hh_id):
    """
    This model predicts the departure time and duration of each activity for mandatory tours
    """

    model_name = 'mandatory_tour_scheduling'
    trace_label = model_name

    persons_merged = persons_merged.to_frame()

    tours = tours.to_frame()
    mandatory_tours = tours[tours.tour_category == 'mandatory']

    # - if no mandatory_tours
    if mandatory_tours.shape[0] == 0:
        tracing.no_results(model_name)
        return

    # - add tour segmentation column
    # mtctm1 segments mandatory_scheduling spec by tour_type
    # (i.e. there are different specs for work and school tour_types)
    # mtctm1 logsum coefficients are segmented by primary_purpose
    # (i.e. there are different logsum coefficients for work, school, univ primary_purposes
    # for simplicity managing these different segmentation schemes,
    # we conflate them by segmenting tour processing to align with primary_purpose
    tour_segment_col = 'mandatory_tour_seg'
    assert tour_segment_col not in mandatory_tours
    is_university_tour = \
        (mandatory_tours.tour_type == 'school') & \
        reindex(persons_merged.is_university, mandatory_tours.person_id)
    mandatory_tours[tour_segment_col] = \
        mandatory_tours.tour_type.where(~is_university_tour, 'univ')

    choices = run_tour_scheduling(model_name, mandatory_tours, persons_merged,
                                  tdd_alts, tour_segment_col, chunk_size,
                                  trace_hh_id)

    assign_in_place(tours, choices)
    pipeline.replace_table("tours", tours)

    # updated df for tracing
    mandatory_tours = tours[tours.tour_category == 'mandatory']

    tracing.dump_df(DUMP, tt.tour_map(persons_merged, mandatory_tours,
                                      tdd_alts), trace_label, 'tour_map')

    if trace_hh_id:
        tracing.trace_df(mandatory_tours,
                         label=trace_label,
                         slicer='person_id',
                         index_label='tour',
                         columns=None,
                         warn_if_empty=True)
def initialize_tours(network_los, households, persons, trace_hh_id):

    trace_label = 'initialize_tours'

    tours = read_input_table("tours")

    # FIXME can't use households_sliced injectable as flag like persons table does in case of resume_after.
    # FIXME could just always slice...
    slice_happened = \
        inject.get_injectable('households_sample_size', 0) > 0 \
        or inject.get_injectable('households_sample_size', 0) > 0
    if slice_happened:
        logger.info("slicing tours %s" % (tours.shape,))
        # keep all persons in the sampled households
        tours = tours[tours.person_id.isin(persons.index)]

    # annotate before patching tour_id to allow addition of REQUIRED_TOUR_COLUMNS defined above
    model_settings = config.read_model_settings('initialize_tours.yaml', mandatory=True)
    expressions.assign_columns(
        df=tours,
        model_settings=model_settings.get('annotate_tours'),
        trace_label=tracing.extend_trace_label(trace_label, 'annotate_tours'))

    skip_patch_tour_ids = model_settings.get('skip_patch_tour_ids', False)
    if skip_patch_tour_ids:
        pass
    else:
        tours = patch_tour_ids(tours)
    assert tours.index.name == 'tour_id'

    # replace table function with dataframe
    inject.add_table('tours', tours)

    pipeline.get_rn_generator().add_channel('tours', tours)

    tracing.register_traceable_table('tours', tours)

    logger.debug(f"{len(tours.household_id.unique())} unique household_ids in tours")
    logger.debug(f"{len(households.index.unique())} unique household_ids in households")
    assert not tours.index.duplicated().any()

    tours_without_persons = ~tours.person_id.isin(persons.index)
    if tours_without_persons.any():
        logger.error(f"{tours_without_persons.sum()} tours out of {len(persons)} without persons\n"
                     f"{pd.Series({'person_id': tours_without_persons.index.values})}")
        raise RuntimeError(f"{tours_without_persons.sum()} tours with bad person_id")

    if trace_hh_id:
        tracing.trace_df(tours,
                         label='initialize_tours',
                         warn_if_empty=True)
Exemple #15
0
def cdap_simulate(persons_merged, cdap_settings, cdap_indiv_spec,
                  cdap_interaction_coefficients,
                  cdap_fixed_relative_proportions, chunk_size, trace_hh_id):
    """
    CDAP stands for Coordinated Daily Activity Pattern, which is a choice of
    high-level activity pattern for each person, in a coordinated way with other
    members of a person's household.

    Because Python requires vectorization of computation, there are some specialized
    routines in the cdap directory of activitysim for this purpose.  This module
    simply applies those utilities using the simulation framework.
    """

    persons_df = persons_merged.to_frame()

    constants = config.get_model_constants(cdap_settings)

    logger.info("Running cdap_simulate with %d persons" %
                len(persons_df.index))

    choices = run_cdap(
        persons=persons_df,
        cdap_indiv_spec=cdap_indiv_spec,
        cdap_interaction_coefficients=cdap_interaction_coefficients,
        cdap_fixed_relative_proportions=cdap_fixed_relative_proportions,
        locals_d=constants,
        chunk_size=chunk_size,
        trace_hh_id=trace_hh_id,
        trace_label='cdap')

    tracing.print_summary('cdap_activity',
                          choices.cdap_activity,
                          value_counts=True)

    print pd.crosstab(persons_df.ptype, choices.cdap_activity, margins=True)

    choices = choices.reindex(persons_merged.index)
    orca.add_column("persons", "cdap_activity", choices.cdap_activity)
    orca.add_column("persons", "cdap_rank", choices.cdap_rank)

    pipeline.add_dependent_columns("persons", "persons_cdap")
    pipeline.add_dependent_columns("households", "households_cdap")

    if trace_hh_id:

        tracing.trace_df(orca.get_table('persons_merged').to_frame(),
                         label="cdap",
                         columns=['ptype', 'cdap_rank', 'cdap_activity'],
                         warn_if_empty=True)
def non_mandatory_tour_destination(
        tours,
        persons_merged,
        skim_dict, skim_stack,
        chunk_size,
        trace_hh_id):

    """
    Given the tour generation from the above, each tour needs to have a
    destination, so in this case tours are the choosers (with the associated
    person that's making the tour)
    """

    trace_label = 'non_mandatory_tour_destination'
    model_settings = config.read_model_settings('non_mandatory_tour_destination.yaml')

    tours = tours.to_frame()

    persons_merged = persons_merged.to_frame()

    # choosers are tours - in a sense tours are choosing their destination
    non_mandatory_tours = tours[tours.tour_category == 'non_mandatory']

    # - if no mandatory_tours
    if non_mandatory_tours.shape[0] == 0:
        tracing.no_results(trace_label)
        return

    choices = tour_destination.run_tour_destination(
        tours,
        persons_merged,
        model_settings,
        skim_dict,
        skim_stack,
        chunk_size, trace_hh_id, trace_label)

    non_mandatory_tours['destination'] = choices

    assign_in_place(tours, non_mandatory_tours[['destination']])

    pipeline.replace_table("tours", tours)

    if trace_hh_id:
        tracing.trace_df(tours[tours.tour_category == 'non_mandatory'],
                         label="non_mandatory_tour_destination",
                         slicer='person_id',
                         index_label='tour',
                         columns=None,
                         warn_if_empty=True)
Exemple #17
0
def trip_destination(
        trips,
        tours_merged,
        chunk_size, trace_hh_id):
    """
    Choose a destination for all 'intermediate' trips based on trip purpose.

    Final trips already have a destination (the primary tour destination for outbound trips,
    and home for inbound trips.)


    """
    trace_label = 'trip_destination'
    model_settings = config.read_model_settings('trip_destination.yaml')
    CLEANUP = model_settings.get('CLEANUP', True)

    trips_df = trips.to_frame()
    tours_merged_df = tours_merged.to_frame()

    logger.info("Running %s with %d trips", trace_label, trips_df.shape[0])

    trips_df = run_trip_destination(
        trips_df,
        tours_merged_df,
        chunk_size, trace_hh_id,
        trace_label)

    if trips_df.failed.any():
        logger.warning("%s %s failed trips", trace_label, trips_df.failed.sum())
        file_name = "%s_failed_trips" % trace_label
        logger.info("writing failed trips to %s", file_name)
        tracing.write_csv(trips_df[trips_df.failed], file_name=file_name, transpose=False)

    if CLEANUP:
        trips_df = cleanup_failed_trips(trips_df)
    elif trips_df.failed.any():
        logger.warning("%s keeping %s sidelined failed trips" %
                       (trace_label, trips_df.failed.sum()))

    pipeline.replace_table("trips", trips_df)

    print("trips_df\n", trips_df.shape)

    if trace_hh_id:
        tracing.trace_df(trips_df,
                         label=trace_label,
                         slicer='trip_id',
                         index_label='trip_id',
                         warn_if_empty=True)
def mandatory_tour_frequency(persons_merged,
                             mandatory_tour_frequency_spec,
                             mandatory_tour_frequency_settings,
                             chunk_size,
                             trace_hh_id):
    """
    This model predicts the frequency of making mandatory trips (see the
    alternatives above) - these trips include work and school in some combination.
    """

    trace_label = 'mandatory_tour_frequency'

    choosers = persons_merged.to_frame()
    # filter based on results of CDAP
    choosers = choosers[choosers.cdap_activity == 'M']
    logger.info("Running mandatory_tour_frequency with %d persons" % len(choosers))

    nest_spec = config.get_logit_model_settings(mandatory_tour_frequency_settings)
    constants = config.get_model_constants(mandatory_tour_frequency_settings)

    choices = simulate.simple_simulate(
        choosers,
        spec=mandatory_tour_frequency_spec,
        nest_spec=nest_spec,
        locals_d=constants,
        chunk_size=chunk_size,
        trace_label=trace_label,
        trace_choice_name='mandatory_tour_frequency')

    # convert indexes to alternative names
    choices = pd.Series(
        mandatory_tour_frequency_spec.columns[choices.values],
        index=choices.index).reindex(persons_merged.local.index)

    tracing.print_summary('mandatory_tour_frequency', choices, value_counts=True)

    inject.add_column("persons", "mandatory_tour_frequency", choices)

    create_mandatory_tours(trace_hh_id)

    # add mandatory_tour-dependent columns (e.g. tour counts) to persons
    pipeline.add_dependent_columns("persons", "persons_mtf")

    if trace_hh_id:
        trace_columns = ['mandatory_tour_frequency']
        tracing.trace_df(inject.get_table('persons').to_frame(),
                         label="mandatory_tour_frequency.persons",
                         # columns=trace_columns,
                         warn_if_empty=True)
Exemple #19
0
def non_mandatory_tour_scheduling(tours, persons_merged, tdd_alts,
                                  tdd_non_mandatory_spec,
                                  non_mandatory_tour_scheduling_settings,
                                  chunk_size, trace_hh_id):
    """
    This model predicts the departure time and duration of each activity for non-mandatory tours
    """

    trace_label = 'non_mandatory_tour_scheduling'

    tours = tours.to_frame()
    persons_merged = persons_merged.to_frame()

    non_mandatory_tours = tours[tours.non_mandatory]

    logger.info("Running non_mandatory_tour_scheduling with %d tours" %
                len(tours))

    constants = config.get_model_constants(
        non_mandatory_tour_scheduling_settings)

    tdd_choices = vectorize_tour_scheduling(non_mandatory_tours,
                                            persons_merged,
                                            tdd_alts,
                                            tdd_non_mandatory_spec,
                                            constants=constants,
                                            chunk_size=chunk_size,
                                            trace_label=trace_label)

    # add tdd_choices columns to tours
    for c in tdd_choices.columns:
        tours.loc[tdd_choices.index, c] = tdd_choices[c]

    pipeline.replace_table("tours", tours)

    non_mandatory_tours = tours[tours.non_mandatory]

    tracing.dump_df(DUMP,
                    tt.tour_map(persons_merged, non_mandatory_tours, tdd_alts),
                    trace_label, 'tour_map')

    if trace_hh_id:
        tracing.trace_df(non_mandatory_tours,
                         label="non_mandatory_tour_scheduling",
                         slicer='person_id',
                         index_label='tour_id',
                         columns=None,
                         warn_if_empty=True)
Exemple #20
0
def persons(households, trace_hh_id):

    df = read_raw_persons(households)

    logger.info("loaded persons %s" % (df.shape, ))

    # replace table function with dataframe
    inject.add_table('persons', df)

    pipeline.get_rn_generator().add_channel('persons', df)

    if trace_hh_id:
        tracing.register_traceable_table('persons', df)
        tracing.trace_df(df, "raw.persons", warn_if_empty=True)

    return df
Exemple #21
0
def atwork_subtour_destination(tours, persons_merged, skim_dict, skim_stack,
                               land_use, size_terms, chunk_size, trace_hh_id):

    persons_merged = persons_merged.to_frame()

    tours = tours.to_frame()
    subtours = tours[tours.tour_category == 'atwork']

    # - if no atwork subtours
    if subtours.shape[0] == 0:
        tracing.no_results('atwork_subtour_destination')
        return

    # interaction_sample_simulate insists choosers appear in same order as alts
    subtours = subtours.sort_index()

    destination_size_terms = tour_destination_size_terms(
        land_use, size_terms, 'atwork')

    destination_sample = atwork_subtour_destination_sample(
        subtours, persons_merged, skim_dict, destination_size_terms,
        chunk_size, trace_hh_id)

    destination_sample = atwork_subtour_destination_logsums(
        persons_merged, destination_sample, skim_dict, skim_stack, chunk_size,
        trace_hh_id)

    choices = atwork_subtour_destination_simulate(subtours, persons_merged,
                                                  destination_sample,
                                                  skim_dict,
                                                  destination_size_terms,
                                                  chunk_size, trace_hh_id)

    subtours['destination'] = choices

    assign_in_place(tours, subtours[['destination']])

    pipeline.replace_table("tours", tours)

    tracing.print_summary('subtour destination',
                          subtours.destination,
                          describe=True)

    if trace_hh_id:
        tracing.trace_df(tours,
                         label='atwork_subtour_destination',
                         columns=['destination'])
Exemple #22
0
def mandatory_tour_frequency(persons_merged,
                             mandatory_tour_frequency_spec,
                             mandatory_tour_frequency_settings,
                             trace_hh_id):
    """
    This model predicts the frequency of making mandatory trips (see the
    alternatives above) - these trips include work and school in some combination.
    """

    choosers = persons_merged.to_frame()
    # filter based on results of CDAP
    choosers = choosers[choosers.cdap_activity == 'M']
    logger.info("Running mandatory_tour_frequency with %d persons" % len(choosers))

    nest_spec = config.get_logit_model_settings(mandatory_tour_frequency_settings)
    constants = config.get_model_constants(mandatory_tour_frequency_settings)

    choices = asim.simple_simulate(
        choosers,
        spec=mandatory_tour_frequency_spec,
        nest_spec=nest_spec,
        locals_d=constants,
        trace_label=trace_hh_id and 'mandatory_tour_frequency',
        trace_choice_name='mandatory_tour_frequency')

    # convert indexes to alternative names
    choices = pd.Series(
        mandatory_tour_frequency_spec.columns[choices.values],
        index=choices.index).reindex(persons_merged.local.index)

    tracing.print_summary('mandatory_tour_frequency', choices, value_counts=True)

    orca.add_column("persons", "mandatory_tour_frequency", choices)
    pipeline.add_dependent_columns("persons", "persons_mtf")

    create_mandatory_tours_table()

    # FIXME - test prng repeatability
    r = pipeline.get_rn_generator().random_for_df(choices)
    orca.add_column("persons", "mtf_rand", [item for sublist in r for item in sublist])

    if trace_hh_id:
        trace_columns = ['mandatory_tour_frequency']
        tracing.trace_df(orca.get_table('persons_merged').to_frame(),
                         label="mandatory_tour_frequency",
                         columns=trace_columns,
                         warn_if_empty=True)
Exemple #23
0
def write_trace_data(trace_results, trace_zones, zones, trace_assigned_locals,
                     trace_zone_rows):
    if trace_results is None:
        logger.warn('trace_zones not found in zones = %s' % (trace_zones))
        return

    df = zones.loc[zones[trace_zone_rows].index]
    df = df.merge(trace_results, how='left', left_index=True, right_index=True)

    tracing.trace_df(df,
                     label='buffered_zones',
                     index_label='None',
                     slicer='NONE',
                     warn_if_empty=True)

    if trace_assigned_locals:
        tracing.write_locals(df, file_name='netbuffer_locals')
def trip_purpose(
        trips,
        chunk_size,
        trace_hh_id):

    """
    trip purpose model step - calls run_trip_purpose to run the actual model

    adds purpose column to trips
    """
    trace_label = "trip_purpose"

    trips_df = trips.to_frame()

    estimator = estimation.manager.begin_estimation('trip_purpose')
    if estimator:
        chooser_cols_for_estimation = ['person_id',  'household_id',  'tour_id',  'trip_num']
        estimator.write_choosers(trips_df[chooser_cols_for_estimation])

    choices = run_trip_purpose(
        trips_df,
        estimator,
        chunk_size=chunk_size,
        trace_hh_id=trace_hh_id,
        trace_label=trace_label
    )

    if estimator:
        estimator.write_choices(choices)
        choices = estimator.get_survey_values(choices, 'trips', 'purpose')  # override choices
        estimator.write_override_choices(choices)
        estimator.end_estimation()

    trips_df['purpose'] = choices

    # we should have assigned a purpose to all trips
    assert not trips_df.purpose.isnull().any()

    pipeline.replace_table("trips", trips_df)

    if trace_hh_id:
        tracing.trace_df(trips_df,
                         label=trace_label,
                         slicer='trip_id',
                         index_label='trip_id',
                         warn_if_empty=True)
Exemple #25
0
def choose_intermediate_trip_purpose(trips, probs_spec, trace_hh_id, trace_label):
    """
    chose purpose for intermediate trips based on probs_spec
    which assigns relative weights (summing to 1) to the possible purpose choices

    Returns
    -------
    purpose: pandas.Series of purpose (str) indexed by trip_id
    """

    probs_join_cols = ['primary_purpose', 'outbound', 'person_type']
    non_purpose_cols = probs_join_cols + ['depart_range_start', 'depart_range_end']
    purpose_cols = [c for c in probs_spec.columns if c not in non_purpose_cols]

    num_trips = len(trips.index)
    have_trace_targets = trace_hh_id and tracing.has_trace_targets(trips)

    # probs shold sum to 1 across rows
    sum_probs = probs_spec[purpose_cols].sum(axis=1)
    probs_spec.loc[:, purpose_cols] = probs_spec.loc[:, purpose_cols].div(sum_probs, axis=0)

    # left join trips to probs (there may be multiple rows per trip for multiple depart ranges)
    choosers = pd.merge(trips.reset_index(), probs_spec, on=probs_join_cols,
                        how='left').set_index('trip_id')

    chunk.log_df(trace_label, 'choosers', choosers)

    # select the matching depart range (this should result on in exactly one chooser row per trip)
    choosers = choosers[(choosers.start >= choosers['depart_range_start']) & (
                choosers.start <= choosers['depart_range_end'])]

    # choosers should now match trips row for row
    assert choosers.index.is_unique
    assert len(choosers.index) == num_trips

    choices, rands = logit.make_choices(
        choosers[purpose_cols],
        trace_label=trace_label, trace_choosers=choosers)

    if have_trace_targets:
        tracing.trace_df(choices, '%s.choices' % trace_label, columns=[None, 'trip_purpose'])
        tracing.trace_df(rands, '%s.rands' % trace_label, columns=[None, 'rand'])

    choices = choices.map(pd.Series(purpose_cols))
    return choices
Exemple #26
0
def persons(households, trace_hh_id):

    df = read_raw_persons(households)

    logger.info("loaded persons %s" % (df.shape,))

    df.index.name = 'person_id'

    # replace table function with dataframe
    inject.add_table('persons', df)

    pipeline.get_rn_generator().add_channel('persons', df)

    if trace_hh_id:
        tracing.register_traceable_table('persons', df)
        tracing.trace_df(df, "raw.persons", warn_if_empty=True)

    return df
Exemple #27
0
def individual_utilities(persons,
                         cdap_indiv_spec,
                         locals_d,
                         trace_hh_id=None,
                         trace_label=None):
    """
    Calculate CDAP utilities for all individuals.

    Parameters
    ----------
    persons : pandas.DataFrame
        DataFrame of individual persons data.
    cdap_indiv_spec : pandas.DataFrame
        CDAP spec applied to individuals.

    Returns
    -------
    utilities : pandas.DataFrame
        Will have index of `persons` and columns for each of the alternatives.
        plus some 'useful columns' [_hh_id_, _ptype_, 'cdap_rank', _hh_size_]

    """

    # calculate single person utilities
    individual_vars = eval_variables(cdap_indiv_spec.index, persons, locals_d)
    indiv_utils = individual_vars.dot(cdap_indiv_spec)

    # add columns from persons to facilitate building household interactions
    useful_columns = [_hh_id_, _ptype_, 'cdap_rank', _hh_size_]
    indiv_utils[useful_columns] = persons[useful_columns]

    # if DUMP:
    #     tracing.trace_df(indiv_utils, '%s.DUMP.indiv_utils' % trace_label,
    #                      transpose=False, slicer='NONE')

    if trace_hh_id:
        tracing.trace_df(individual_vars,
                         '%s.individual_vars' % trace_label,
                         column_labels=['expression', 'person'])
        tracing.trace_df(indiv_utils,
                         '%s.indiv_utils' % trace_label,
                         column_labels=['activity', 'person'])

    return indiv_utils
Exemple #28
0
def persons(households, trace_hh_id):

    df = read_raw_persons(households)

    logger.info("loaded persons %s" % (df.shape, ))

    # replace table function with dataframe
    inject.add_table('persons', df)

    pipeline.get_rn_generator().add_channel('persons', df)

    tracing.register_traceable_table('persons', df)
    if trace_hh_id:
        tracing.trace_df(df, "raw.persons", warn_if_empty=True)

    logger.debug(
        f"{len(df.household_id.unique())} unique household_ids in persons")
    logger.debug(
        f"{len(households.index.unique())} unique household_ids in households")
    assert not households.index.duplicated().any()
    assert not df.index.duplicated().any()

    persons_without_households = ~df.household_id.isin(households.index)
    if persons_without_households.any():
        logger.error(
            f"{persons_without_households.sum()} persons out of {len(persons)} without households\n"
            f"{pd.Series({'person_id': persons_without_households.index.values})}"
        )
        raise RuntimeError(
            f"{persons_without_households.sum()} persons with bad household_id"
        )

    households_without_persons = df.groupby('household_id').size().reindex(
        households.index).isnull()
    if households_without_persons.any():
        logger.error(
            f"{households_without_persons.sum()} households out of {len(households.index)} without  persons\n"
            f"{pd.Series({'household_id': households_without_persons.index.values})}"
        )
        raise RuntimeError(
            f"{households_without_persons.sum()} households with no persons")

    return df
Exemple #29
0
def non_mandatory_tour_destination(tours, persons_merged, skim_dict,
                                   skim_stack, chunk_size, trace_hh_id):
    """
    Given the tour generation from the above, each tour needs to have a
    destination, so in this case tours are the choosers (with the associated
    person that's making the tour)
    """

    trace_label = 'non_mandatory_tour_destination'
    model_settings = config.read_model_settings(
        'non_mandatory_tour_destination.yaml')

    tours = tours.to_frame()

    persons_merged = persons_merged.to_frame()

    # choosers are tours - in a sense tours are choosing their destination
    non_mandatory_tours = tours[tours.tour_category == 'non_mandatory']

    # - if no mandatory_tours
    if non_mandatory_tours.shape[0] == 0:
        tracing.no_results(trace_label)
        return

    choices = tour_destination.run_tour_destination(tours, persons_merged,
                                                    model_settings, skim_dict,
                                                    skim_stack, chunk_size,
                                                    trace_hh_id, trace_label)

    non_mandatory_tours['destination'] = choices

    assign_in_place(tours, non_mandatory_tours[['destination']])

    pipeline.replace_table("tours", tours)

    if trace_hh_id:
        tracing.trace_df(tours[tours.tour_category == 'non_mandatory'],
                         label="non_mandatory_tour_destination",
                         slicer='person_id',
                         index_label='tour',
                         columns=None,
                         warn_if_empty=True)
Exemple #30
0
def non_mandatory_tour_scheduling(tours, persons_merged, tdd_alts, chunk_size,
                                  trace_hh_id):
    """
    This model predicts the departure time and duration of each activity for non-mandatory tours
    """

    model_name = 'non_mandatory_tour_scheduling'
    trace_label = model_name

    persons_merged = persons_merged.to_frame()

    tours = tours.to_frame()
    non_mandatory_tours = tours[tours.tour_category == 'non_mandatory']

    # - if no mandatory_tours
    if non_mandatory_tours.shape[0] == 0:
        tracing.no_results(model_name)
        return

    tour_segment_col = None

    choices = run_tour_scheduling(model_name, non_mandatory_tours,
                                  persons_merged, tdd_alts, tour_segment_col,
                                  chunk_size, trace_hh_id)

    assign_in_place(tours, choices)
    pipeline.replace_table("tours", tours)

    # updated df for tracing
    non_mandatory_tours = tours[tours.tour_category == 'non_mandatory']

    tracing.dump_df(DUMP,
                    tt.tour_map(persons_merged, non_mandatory_tours, tdd_alts),
                    trace_label, 'tour_map')

    if trace_hh_id:
        tracing.trace_df(non_mandatory_tours,
                         label=trace_label,
                         slicer='person_id',
                         index_label='tour_id',
                         columns=None,
                         warn_if_empty=True)
Exemple #31
0
def persons(store, households_sample_size, households, trace_hh_id):

    df = store["persons"]

    if households_sample_size > 0:
        # keep all persons in the sampled households
        df = df[df.household_id.isin(households.index)]

    logger.info("loaded persons %s" % (df.shape, ))

    # replace table function with dataframe
    orca.add_table('persons', df)

    pipeline.get_rn_generator().add_channel(df, 'persons')

    if trace_hh_id:
        tracing.register_traceable_table('persons', df)
        tracing.trace_df(df, "persons", warn_if_empty=True)

    return df
Exemple #32
0
def households(store, households_sample_size, trace_hh_id):

    df_full = store["households"]

    # if we are tracing hh exclusively
    if trace_hh_id and households_sample_size == 1:

        # df contains only trace_hh (or empty if not in full store)
        df = tracing.slice_ids(df_full, trace_hh_id)

    # if we need sample a subset of full store
    elif households_sample_size > 0 and df_full.shape[0] > households_sample_size:

        logger.info("sampling %s of %s households" % (households_sample_size, df_full.shape[0]))

        # take the requested random sample
        df = asim.random_rows(df_full, households_sample_size)

        # if tracing and we missed trace_hh in sample, but it is in full store
        if trace_hh_id and trace_hh_id not in df.index and trace_hh_id in df_full.index:
                # replace first hh in sample with trace_hh
                logger.debug("replacing household %s with %s in household sample" %
                             (df.index[0], trace_hh_id))
                df_hh = tracing.slice_ids(df_full, trace_hh_id)
                df = pd.concat([df_hh, df[1:]])

    else:
        df = df_full

    logger.info("loaded households %s" % (df.shape,))

    # replace table function with dataframe
    inject.add_table('households', df)

    pipeline.get_rn_generator().add_channel(df, 'households')

    if trace_hh_id:
        tracing.register_traceable_table('households', df)
        tracing.trace_df(df, "households", warn_if_empty=True)

    return df
Exemple #33
0
def auto_ownership_simulate(households, households_merged, chunk_size,
                            trace_hh_id):
    """
    Auto ownership is a standard model which predicts how many cars a household
    with given characteristics owns
    """
    trace_label = 'auto_ownership_simulate'
    model_settings = config.read_model_settings('auto_ownership.yaml')

    logger.info("Running %s with %d households", trace_label,
                len(households_merged))

    model_spec = simulate.read_model_spec(file_name='auto_ownership.csv')

    nest_spec = config.get_logit_model_settings(model_settings)
    constants = config.get_model_constants(model_settings)

    choices = simulate.simple_simulate(choosers=households_merged.to_frame(),
                                       spec=model_spec,
                                       nest_spec=nest_spec,
                                       locals_d=constants,
                                       chunk_size=chunk_size,
                                       trace_label=trace_label,
                                       trace_choice_name='auto_ownership')

    households = households.to_frame()

    # no need to reindex as we used all households
    households['auto_ownership'] = choices

    pipeline.replace_table("households", households)

    tracing.print_summary('auto_ownership',
                          households.auto_ownership,
                          value_counts=True)

    if trace_hh_id:
        tracing.trace_df(households,
                         label='auto_ownership',
                         warn_if_empty=True)
Exemple #34
0
def auto_ownership_simulate(households,
                            households_merged,
                            chunk_size,
                            trace_hh_id):
    """
    Auto ownership is a standard model which predicts how many cars a household
    with given characteristics owns
    """
    trace_label = 'auto_ownership_simulate'
    model_settings = config.read_model_settings('auto_ownership.yaml')

    logger.info("Running %s with %d households", trace_label, len(households_merged))

    model_spec = simulate.read_model_spec(file_name='auto_ownership.csv')

    nest_spec = config.get_logit_model_settings(model_settings)
    constants = config.get_model_constants(model_settings)

    choices = simulate.simple_simulate(
        choosers=households_merged.to_frame(),
        spec=model_spec,
        nest_spec=nest_spec,
        locals_d=constants,
        chunk_size=chunk_size,
        trace_label=trace_label,
        trace_choice_name='auto_ownership')

    households = households.to_frame()

    # no need to reindex as we used all households
    households['auto_ownership'] = choices

    pipeline.replace_table("households", households)

    tracing.print_summary('auto_ownership', households.auto_ownership, value_counts=True)

    if trace_hh_id:
        tracing.trace_df(households,
                         label='auto_ownership',
                         warn_if_empty=True)
def create_non_mandatory_tours(trace_hh_id):
    """
    We have now generated non-mandatory tours, but they are attributes of the person table
    Now we create a "tours" table which has one row per tour that has been generated
    (and the person id it is associated with)
    """

    persons = inject.get_table('persons')
    alts = inject.get_injectable('non_mandatory_tour_frequency_alts')

    non_mandatory_tours = process_non_mandatory_tours(
        persons.non_mandatory_tour_frequency.dropna(),
        alts
    )

    tours = pipeline.extend_table("tours", non_mandatory_tours)
    tracing.register_traceable_table('tours', tours)
    pipeline.get_rn_generator().add_channel(non_mandatory_tours, 'tours')

    if trace_hh_id:
        tracing.trace_df(non_mandatory_tours,
                         label="non_mandatory_tour_frequency.non_mandatory_tours",
                         warn_if_empty=True)
def extend_tour_counts(persons, tour_counts, alternatives, trace_hh_id, trace_label):
    """
    extend tour counts based on a probability table

    counts can only be extended if original count is between 1 and 4
    and tours can only be extended if their count is at the max possible
    (e.g. 2 for escort, 1 otherwise) so escort might be increased to 3 or 4
    and other tour types might be increased to 2 or 3

    Parameters
    ----------
    persons: pandas dataframe
        (need this for join columns)
    tour_counts: pandas dataframe
        one row per person, once column per tour_type
    alternatives
        alternatives from nmtv interaction_simulate
        only need this to know max possible frequency for a tour type
    trace_hh_id
    trace_label

    Returns
    -------
    extended tour_counts


    tour_counts looks like this:
               escort  shopping  othmaint  othdiscr    eatout    social
    parent_id
    2588676         2         0         0         1         1         0
    2588677         0         1         0         1         0         0

    """

    assert tour_counts.index.name == persons.index.name

    PROBABILITY_COLUMNS = ['0_tours', '1_tours', '2_tours']
    JOIN_COLUMNS = ['ptype', 'has_mandatory_tour', 'has_joint_tour']
    TOUR_TYPE_COL = 'nonmandatory_tour_type'

    probs_spec = extension_probs()
    persons = persons[JOIN_COLUMNS]

    # only extend if there are 1 - 4 non_mandatory tours to start with
    extend_tour_counts = tour_counts.sum(axis=1).between(1, 4)
    if not extend_tour_counts.any():
        return tour_counts

    have_trace_targets = trace_hh_id and tracing.has_trace_targets(extend_tour_counts)

    for i, tour_type in enumerate(alternatives.columns):

        i_tour_type = i + 1  # (probs_spec nonmandatory_tour_type column is 1-based)
        tour_type_trace_label = tracing.extend_trace_label(trace_label, tour_type)

        # - only extend tour if frequency is max possible frequency for this tour type
        tour_type_is_maxed = \
            extend_tour_counts & (tour_counts[tour_type] == alternatives[tour_type].max())
        maxed_tour_count_idx = tour_counts.index[tour_type_is_maxed]

        if len(maxed_tour_count_idx) == 0:
            continue

        # - get extension probs for tour_type
        choosers = pd.merge(
            persons.loc[maxed_tour_count_idx],
            probs_spec[probs_spec[TOUR_TYPE_COL] == i_tour_type],
            on=JOIN_COLUMNS,
            how='left'
        ).set_index(maxed_tour_count_idx)
        assert choosers.index.name == tour_counts.index.name

        # - random choice of extension magnituce based on relative probs
        choices, rands = logit.make_choices(
            choosers[PROBABILITY_COLUMNS],
            trace_label=tour_type_trace_label,
            trace_choosers=choosers)

        # - extend tour_count (0-based prob alternative choice equals magnitude of extension)
        if choices.any():
            tour_counts.loc[choices.index, tour_type] += choices

        if have_trace_targets:
            tracing.trace_df(choices,
                             tracing.extend_trace_label(tour_type_trace_label, 'choices'),
                             columns=[None, 'choice'])
            tracing.trace_df(rands,
                             tracing.extend_trace_label(tour_type_trace_label, 'rands'),
                             columns=[None, 'rand'])

    return tour_counts
Exemple #37
0
def iterate_location_choice(
        model_settings,
        persons_merged, persons, households,
        skim_dict, skim_stack,
        chunk_size, trace_hh_id, locutor,
        trace_label):
    """
    iterate run_location_choice updating shadow pricing until convergence criteria satisfied
    or max_iterations reached.

    (If use_shadow_pricing not enabled, then just iterate once)

    Parameters
    ----------
    model_settings : dict
    persons_merged : injected table
    persons : injected table
    skim_dict : skim.SkimDict
    skim_stack : skim.SkimStack
    chunk_size : int
    trace_hh_id : int
    locutor : bool
        whether this process is the privileged logger of shadow_pricing when multiprocessing
    trace_label : str

    Returns
    -------
    adds choice column model_settings['DEST_CHOICE_COLUMN_NAME'] and annotations to persons table
    """

    # column containing segment id
    chooser_segment_column = model_settings['CHOOSER_SEGMENT_COLUMN_NAME']

    # boolean to filter out persons not needing location modeling (e.g. is_worker, is_student)
    chooser_filter_column = model_settings['CHOOSER_FILTER_COLUMN_NAME']

    persons_merged_df = persons_merged.to_frame()

    persons_merged_df = persons_merged_df[persons_merged[chooser_filter_column]]

    spc = shadow_pricing.load_shadow_price_calculator(model_settings)
    max_iterations = spc.max_iterations

    logging.debug("%s max_iterations: %s" % (trace_label, max_iterations))

    choices = None
    for iteration in range(1, max_iterations + 1):

        if spc.use_shadow_pricing and iteration > 1:
            spc.update_shadow_prices()

        choices = run_location_choice(
            persons_merged_df,
            skim_dict, skim_stack,
            spc,
            model_settings,
            chunk_size, trace_hh_id,
            trace_label=tracing.extend_trace_label(trace_label, 'i%s' % iteration))

        choices_df = choices.to_frame('dest_choice')
        choices_df['segment_id'] = \
            persons_merged_df[chooser_segment_column].reindex(choices_df.index)

        spc.set_choices(choices_df)

        if locutor:
            spc.write_trace_files(iteration)

        if spc.use_shadow_pricing and spc.check_fit(iteration):
            logging.info("%s converged after iteration %s" % (trace_label, iteration,))
            break

    # - shadow price table
    if locutor:
        if spc.use_shadow_pricing and 'SHADOW_PRICE_TABLE' in model_settings:
            inject.add_table(model_settings['SHADOW_PRICE_TABLE'], spc.shadow_prices)
        if 'MODELED_SIZE_TABLE' in model_settings:
            inject.add_table(model_settings['MODELED_SIZE_TABLE'], spc.modeled_size)

    dest_choice_column_name = model_settings['DEST_CHOICE_COLUMN_NAME']
    tracing.print_summary(dest_choice_column_name, choices, value_counts=True)

    persons_df = persons.to_frame()

    # We only chose school locations for the subset of persons who go to school
    # so we backfill the empty choices with -1 to code as no school location
    NO_DEST_TAZ = -1
    persons_df[dest_choice_column_name] = \
        choices.reindex(persons_df.index).fillna(NO_DEST_TAZ).astype(int)

    # - annotate persons table
    if 'annotate_persons' in model_settings:
        expressions.assign_columns(
            df=persons_df,
            model_settings=model_settings.get('annotate_persons'),
            trace_label=tracing.extend_trace_label(trace_label, 'annotate_persons'))

        pipeline.replace_table("persons", persons_df)

        if trace_hh_id:
            tracing.trace_df(persons_df,
                             label=trace_label,
                             warn_if_empty=True)

    # - annotate households table
    if 'annotate_households' in model_settings:

        households_df = households.to_frame()
        expressions.assign_columns(
            df=households_df,
            model_settings=model_settings.get('annotate_households'),
            trace_label=tracing.extend_trace_label(trace_label, 'annotate_households'))
        pipeline.replace_table("households", households_df)

        if trace_hh_id:
            tracing.trace_df(households_df,
                             label=trace_label,
                             warn_if_empty=True)

    return persons_df
Exemple #38
0
def extra_hh_member_choices(persons, cdap_fixed_relative_proportions, locals_d,
                            trace_hh_id, trace_label):
    """
    Generate the activity choices for the 'extra' household members who weren't handled by cdap

    Following the CTRAMP HouseholdCoordinatedDailyActivityPatternModel, "a separate,
    simple cross-sectional distribution is looked up for the remaining household members"

    The cdap_fixed_relative_proportions spec is handled like an activitysim logit utility spec,
    EXCEPT that the values computed are relative proportions, not utilities
    (i.e. values are not exponentiated before being normalized to probabilities summing to 1.0)

    Parameters
    ----------
    persons : pandas.DataFrame
        Table of persons data indexed on _persons_index_
         We expect, at least, columns [_hh_id_, _ptype_]
    cdap_fixed_relative_proportions
        spec to compute/specify the relative proportions of each activity (M, N, H)
        that should be used to choose activities for additional household members
        not handled by CDAP.
    locals_d : Dict
        dictionary of local variables that eval_variables adds to the environment
        for an evaluation of an expression that begins with @

    Returns
    -------
    choices : pandas.Series
        list of alternatives chosen for all extra members, indexed by _persons_index_
    """

    trace_label = tracing.extend_trace_label(trace_label, 'extra_hh_member_choices')

    # extra household members have cdap_ran > MAX_HHSIZE
    choosers = persons[persons['cdap_rank'] > MAX_HHSIZE]

    if len(choosers.index) == 0:
        return pd.Series()

    # eval the expression file
    values = simulate.eval_variables(cdap_fixed_relative_proportions.index, choosers, locals_d)

    # cdap_fixed_relative_proportions computes relative proportions by ptype, not utilities
    proportions = values.dot(cdap_fixed_relative_proportions)

    # convert relative proportions to probability
    probs = proportions.div(proportions.sum(axis=1), axis=0)

    # select an activity pattern alternative for each person based on probability
    # idx_choices is a series (indexed on _persons_index_ ) with the chosen alternative represented
    # as the integer (0 based) index of the chosen column from probs
    idx_choices, rands = logit.make_choices(probs, trace_label=trace_label)

    # convert choice from column index to activity name
    choices = pd.Series(probs.columns[idx_choices].values, index=probs.index)

    # if DUMP:
    #     tracing.trace_df(proportions, '%s.DUMP.extra_proportions' % trace_label,
    #                      transpose=False, slicer='NONE')
    #     tracing.trace_df(probs, '%s.DUMP.extra_probs' % trace_label,
    #                      transpose=False, slicer='NONE')
    #     tracing.trace_df(choices, '%s.DUMP.extra_choices' % trace_label,
    #                      transpose=False,
    #                      slicer='NONE')

    if trace_hh_id:
        tracing.trace_df(proportions, '%s.extra_hh_member_choices_proportions' % trace_label,
                         column_labels=['expression', 'person'])
        tracing.trace_df(probs, '%s.extra_hh_member_choices_probs' % trace_label,
                         column_labels=['expression', 'person'])
        tracing.trace_df(choices, '%s.extra_hh_member_choices_choices' % trace_label,
                         column_labels=['expression', 'person'])
        tracing.trace_df(rands, '%s.extra_hh_member_choices_rands' % trace_label,
                         columns=[None, 'rand'])

    return choices
Exemple #39
0
def trip_mode_choice(
        trips,
        tours_merged,
        skim_dict, skim_stack,
        chunk_size, trace_hh_id):
    """
    Trip mode choice - compute trip_mode (same values as for tour_mode) for each trip.

    Modes for each primary tour putpose are calculated separately because they have different
    coefficient values (stored in trip_mode_choice_coeffs.csv coefficient file.)

    Adds trip_mode column to trip table
    """
    trace_label = 'trip_mode_choice'
    model_settings = config.read_model_settings('trip_mode_choice.yaml')

    model_spec = \
        simulate.read_model_spec(file_name=model_settings['SPEC'])
    omnibus_coefficients = \
        assign.read_constant_spec(config.config_file_path(model_settings['COEFFS']))

    trips_df = trips.to_frame()
    logger.info("Running %s with %d trips", trace_label, trips_df.shape[0])

    tours_merged = tours_merged.to_frame()
    tours_merged = tours_merged[model_settings['TOURS_MERGED_CHOOSER_COLUMNS']]

    nest_spec = config.get_logit_model_settings(model_settings)

    tracing.print_summary('primary_purpose',
                          trips_df.primary_purpose, value_counts=True)

    # - trips_merged - merge trips and tours_merged
    trips_merged = pd.merge(
        trips_df,
        tours_merged,
        left_on='tour_id',
        right_index=True,
        how="left")
    assert trips_merged.index.equals(trips.index)

    # setup skim keys
    assert ('trip_period' not in trips_merged)
    trips_merged['trip_period'] = skim_time_period_label(trips_merged.depart)

    orig_col = 'origin'
    dest_col = 'destination'

    odt_skim_stack_wrapper = skim_stack.wrap(left_key=orig_col, right_key=dest_col,
                                             skim_key='trip_period')
    od_skim_wrapper = skim_dict.wrap('origin', 'destination')

    skims = {
        "odt_skims": odt_skim_stack_wrapper,
        "od_skims": od_skim_wrapper,
    }

    constants = config.get_model_constants(model_settings)
    constants.update({
        'ORIGIN': orig_col,
        'DESTINATION': dest_col
    })

    choices_list = []
    for primary_purpose, trips_segment in trips_merged.groupby('primary_purpose'):

        segment_trace_label = tracing.extend_trace_label(trace_label, primary_purpose)

        logger.info("trip_mode_choice tour_type '%s' (%s trips)" %
                    (primary_purpose, len(trips_segment.index), ))

        # name index so tracing knows how to slice
        assert trips_segment.index.name == 'trip_id'

        locals_dict = assign.evaluate_constants(omnibus_coefficients[primary_purpose],
                                                constants=constants)
        locals_dict.update(constants)

        annotate_preprocessors(
            trips_segment, locals_dict, skims,
            model_settings, segment_trace_label)

        locals_dict.update(skims)
        choices = simulate.simple_simulate(
            choosers=trips_segment,
            spec=model_spec,
            nest_spec=nest_spec,
            skims=skims,
            locals_d=locals_dict,
            chunk_size=chunk_size,
            trace_label=segment_trace_label,
            trace_choice_name='trip_mode_choice')

        alts = model_spec.columns
        choices = choices.map(dict(list(zip(list(range(len(alts))), alts))))

        # tracing.print_summary('trip_mode_choice %s choices' % primary_purpose,
        #                       choices, value_counts=True)

        if trace_hh_id:
            # trace the coefficients
            tracing.trace_df(pd.Series(locals_dict),
                             label=tracing.extend_trace_label(segment_trace_label, 'constants'),
                             transpose=False,
                             slicer='NONE')

            # so we can trace with annotations
            trips_segment['trip_mode'] = choices
            tracing.trace_df(trips_segment,
                             label=tracing.extend_trace_label(segment_trace_label, 'trip_mode'),
                             slicer='tour_id',
                             index_label='tour_id',
                             warn_if_empty=True)

        choices_list.append(choices)

        # FIXME - force garbage collection
        force_garbage_collect()

    choices = pd.concat(choices_list)

    trips_df = trips.to_frame()
    trips_df['trip_mode'] = choices

    tracing.print_summary('tour_modes',
                          trips_merged.tour_mode, value_counts=True)

    tracing.print_summary('trip_mode_choice choices',
                          choices, value_counts=True)

    assert not trips_df.trip_mode.isnull().any()

    pipeline.replace_table("trips", trips_df)

    if trace_hh_id:
        tracing.trace_df(trips_df,
                         label=tracing.extend_trace_label(trace_label, 'trip_mode'),
                         slicer='trip_id',
                         index_label='trip_id',
                         warn_if_empty=True)
Exemple #40
0
def household_activity_choices(indiv_utils, interaction_coefficients, hhsize,
                               trace_hh_id=None, trace_label=None):
    """
    Calculate household utilities for each activity pattern alternative for households of hhsize
    The resulting activity pattern for each household will be coded as a string of activity codes.
    e.g. 'MNHH' for a 4 person household with activities Mandatory, NonMandatory, Home, Home

    Parameters
    ----------
    indiv_utils : pandas.DataFrame
        CDAP utilities for each individual, ignoring interactions
        ind_utils has index of _persons_index_ and a column for each alternative
        i.e. three columns 'M' (Mandatory), 'N' (NonMandatory), 'H' (Home)

    interaction_coefficients : pandas.DataFrame
        Rules and coefficients for generating interaction specs for different household sizes

    hhsize : int
        the size of household for which activity perttern should be calculated (1..MAX_HHSIZE)

    Returns
    -------
    choices : pandas.Series
        the chosen cdap activity pattern for each household represented as a string (e.g. 'MNH')
        with same index (_hh_index_) as utils

    """

    if hhsize == 1:
        # for 1 person households, there are no interactions to account for
        # and the household utils are the same as the individual utils
        choosers = vars = None
        # extract the individual utilities for individuals from hhsize 1 households
        utils = indiv_utils.loc[indiv_utils[_hh_size_] == 1, [_hh_id_, 'M', 'N', 'H']]
        # index on household_id, not person_id
        set_hh_index(utils)
    else:

        choosers = hh_choosers(indiv_utils, hhsize=hhsize)

        spec = build_cdap_spec(interaction_coefficients, hhsize,
                               trace_spec=(trace_hh_id in choosers.index),
                               trace_label=trace_label)

        utils = simulate.eval_utilities(spec, choosers, trace_label=trace_label)

    if len(utils.index) == 0:
        return pd.Series()

    probs = logit.utils_to_probs(utils, trace_label=trace_label)

    # select an activity pattern alternative for each household based on probability
    # result is a series indexed on _hh_index_ with the (0 based) index of the column from probs
    idx_choices, rands = logit.make_choices(probs, trace_label=trace_label)

    # convert choice expressed as index into alternative name from util column label
    choices = pd.Series(utils.columns[idx_choices].values, index=utils.index)

    if trace_hh_id:

        if hhsize > 1:
            tracing.trace_df(choosers, '%s.hhsize%d_choosers' % (trace_label, hhsize),
                             column_labels=['expression', 'person'])

        tracing.trace_df(utils, '%s.hhsize%d_utils' % (trace_label, hhsize),
                         column_labels=['expression', 'household'])
        tracing.trace_df(probs, '%s.hhsize%d_probs' % (trace_label, hhsize),
                         column_labels=['expression', 'household'])
        tracing.trace_df(choices, '%s.hhsize%d_activity_choices' % (trace_label, hhsize),
                         column_labels=['expression', 'household'])
        tracing.trace_df(rands, '%s.hhsize%d_rands' % (trace_label, hhsize),
                         columns=[None, 'rand'])

    return choices
Exemple #41
0
def assign_cdap_rank(persons, trace_hh_id=None, trace_label=None):
    """
    Assign an integer index, cdap_rank, to each household member. (Starting with 1, not 0)

    Modifies persons df in place

    The cdap_rank order is important, because cdap only assigns activities to the first
    MAX_HHSIZE persons in each household.

    This will preferentially be two working adults and the three youngest children.

    Rank is assigned starting at 1. This necessitates some care indexing, but is preferred as
    it follows the convention of 1-based pnums in expression files.

    According to the documentation of reOrderPersonsForCdap in mtctm2.abm.ctramp
    HouseholdCoordinatedDailyActivityPatternModel:

    "Method reorders the persons in the household for use with the CDAP model,
    which only explicitly models the interaction of five persons in a HH. Priority
    in the reordering is first given to full time workers (up to two), then to
    part time workers (up to two workers, of any type), then to children (youngest
    to oldest, up to three). If the method is called for a household with less
    than 5 people, the cdapPersonArray is the same as the person array."

    We diverge from the above description in that a cdap_rank is assigned to all persons,
    including 'extra' household members, whose activity is assigned subsequently.
    The pair _hh_id_, cdap_rank will uniquely identify each household member.

    Parameters
    ----------
    persons : pandas.DataFrame
        Table of persons data. Must contain columns _hh_size_, _hh_id_, _ptype_, _age_

    Returns
    -------
    cdap_rank : pandas.Series
        integer cdap_rank of every person, indexed on _persons_index_
    """

    # transient categories used to categorize persons in cdap_rank before assigning final rank
    RANK_WORKER = 1
    RANK_CHILD = 2
    RANK_BACKFILL = 3
    RANK_UNASSIGNED = 9
    persons['cdap_rank'] = RANK_UNASSIGNED

    # choose up to 2 workers, preferring full over part, older over younger
    workers = \
        persons.loc[persons[_ptype_].isin(WORKER_PTYPES), [_hh_id_, _ptype_]]\
        .sort_values(by=[_hh_id_, _ptype_], ascending=[True, True])\
        .groupby(_hh_id_).head(2)
    # tag the selected workers
    persons.loc[workers.index, 'cdap_rank'] = RANK_WORKER
    del workers

    # choose up to 3, preferring youngest
    children = \
        persons.loc[persons[_ptype_].isin(CHILD_PTYPES), [_hh_id_, _ptype_, _age_]]\
        .sort_values(by=[_hh_id_, _ptype_], ascending=[True, True])\
        .groupby(_hh_id_).head(3)
    # tag the selected children
    persons.loc[children.index, 'cdap_rank'] = RANK_CHILD
    del children

    # choose up to MAX_HHSIZE, preferring anyone already chosen
    # others = \
    #     persons[[_hh_id_, 'cdap_rank']]\
    #     .sort_values(by=[_hh_id_, 'cdap_rank'], ascending=[True, True])\
    #     .groupby(_hh_id_).head(MAX_HHSIZE)

    # choose up to MAX_HHSIZE, choosing randomly
    others = persons[[_hh_id_, 'cdap_rank']].copy()
    others['random_order'] = pipeline.get_rn_generator().random_for_df(persons)
    others = \
        others\
        .sort_values(by=[_hh_id_, 'random_order'], ascending=[True, True])\
        .groupby(_hh_id_).head(MAX_HHSIZE)

    # tag the backfilled persons
    persons.loc[others[others.cdap_rank == RANK_UNASSIGNED].index, 'cdap_rank'] \
        = RANK_BACKFILL
    del others

    # assign person number in cdapPersonArray preference order
    # i.e. convert cdap_rank from category to index in order of category rank within household
    # groupby rank() is slow, so we compute rank artisanally
    # save time by sorting only the columns we need (persons is big, and sort moves data)
    p = persons[[_hh_id_, 'cdap_rank', _age_]]\
        .sort_values(by=[_hh_id_, 'cdap_rank', _age_], ascending=[True, True, True])
    rank = p.groupby(_hh_id_).size().map(range)
    rank = [item+1 for sublist in rank for item in sublist]
    p['cdap_rank'] = rank
    persons['cdap_rank'] = p['cdap_rank']  # assignment aligns on index values

    # if DUMP:
    #     tracing.trace_df(persons, '%s.DUMP.cdap_person_array' % trace_label,
    #                      transpose=False, slicer='NONE')

    if trace_hh_id:
        tracing.trace_df(persons, '%s.cdap_rank' % trace_label)

    return persons['cdap_rank']
Exemple #42
0
def cdap_simulate(persons_merged, persons, households,
                  cdap_indiv_spec,
                  cdap_interaction_coefficients,
                  cdap_fixed_relative_proportions,
                  chunk_size, trace_hh_id):
    """
    CDAP stands for Coordinated Daily Activity Pattern, which is a choice of
    high-level activity pattern for each person, in a coordinated way with other
    members of a person's household.

    Because Python requires vectorization of computation, there are some specialized
    routines in the cdap directory of activitysim for this purpose.  This module
    simply applies those utilities using the simulation framework.
    """

    trace_label = 'cdap'
    model_settings = config.read_model_settings('cdap.yaml')

    persons_merged = persons_merged.to_frame()

    constants = config.get_model_constants(model_settings)

    cdap_interaction_coefficients = \
        cdap.preprocess_interaction_coefficients(cdap_interaction_coefficients)

    # specs are built just-in-time on demand and cached as injectables
    # prebuilding here allows us to write them to the output directory
    # (also when multiprocessing locutor might not see all household sizes)
    logger.info("Pre-building cdap specs")
    for hhsize in range(2, cdap.MAX_HHSIZE + 1):
        spec = cdap.build_cdap_spec(cdap_interaction_coefficients, hhsize, cache=True)
        if inject.get_injectable('locutor', False):
            spec.to_csv(config.output_file_path('cdap_spec_%s.csv' % hhsize), index=True)

    logger.info("Running cdap_simulate with %d persons", len(persons_merged.index))

    choices = cdap.run_cdap(
        persons=persons_merged,
        cdap_indiv_spec=cdap_indiv_spec,
        cdap_interaction_coefficients=cdap_interaction_coefficients,
        cdap_fixed_relative_proportions=cdap_fixed_relative_proportions,
        locals_d=constants,
        chunk_size=chunk_size,
        trace_hh_id=trace_hh_id,
        trace_label=trace_label)

    # - assign results to persons table and annotate
    persons = persons.to_frame()

    choices = choices.reindex(persons.index)
    persons['cdap_activity'] = choices.cdap_activity
    persons['cdap_rank'] = choices.cdap_rank

    expressions.assign_columns(
        df=persons,
        model_settings=model_settings.get('annotate_persons'),
        trace_label=tracing.extend_trace_label(trace_label, 'annotate_persons'))

    pipeline.replace_table("persons", persons)

    # - annotate households table
    households = households.to_frame()
    expressions.assign_columns(
        df=households,
        model_settings=model_settings.get('annotate_households'),
        trace_label=tracing.extend_trace_label(trace_label, 'annotate_households'))
    pipeline.replace_table("households", households)

    tracing.print_summary('cdap_activity', persons.cdap_activity, value_counts=True)
    logger.info("cdap crosstabs:\n%s" %
                pd.crosstab(persons.ptype, persons.cdap_activity, margins=True))

    if trace_hh_id:

        tracing.trace_df(inject.get_table('persons_merged').to_frame(),
                         label="cdap",
                         columns=['ptype', 'cdap_rank', 'cdap_activity'],
                         warn_if_empty=True)
Exemple #43
0
def best_transit_path(set_random_seed,
                      network_los,
                      best_transit_path_spec):

    model_settings = config.read_model_settings('best_transit_path.yaml')

    logger.info("best_transit_path VECTOR_TEST_SIZE %s", VECTOR_TEST_SIZE)

    omaz = network_los.maz_df.sample(VECTOR_TEST_SIZE, replace=True).index
    dmaz = network_los.maz_df.sample(VECTOR_TEST_SIZE, replace=True).index
    tod = np.random.choice(['AM', 'PM'], VECTOR_TEST_SIZE)
    od_df = pd.DataFrame({'omaz': omaz, 'dmaz': dmaz, 'tod': tod})

    trace_od = (od_df.omaz[0], od_df.dmaz[0])
    logger.info("trace_od omaz %s dmaz %s" % trace_od)

    # build exploded atap_btap_df

    # FIXME - pathological knowledge about mode - should be parameterized
    # filter out rows with no drive time omaz-btap or no walk time from dmaz-atap
    atap_btap_df = network_los.get_tappairs_mazpairs(od_df.omaz, od_df.dmaz,
                                                     ofilter='drive_time',
                                                     dfilter='walk_alightingActual')

    # add in tod column
    atap_btap_df = atap_btap_df.merge(
        right=od_df[['tod']],
        left_on='idx',
        right_index=True,
        how='left'
    )

    logger.info("len od_df %s", len(od_df.index))
    logger.info("len atap_btap_df %s", len(atap_btap_df.index))
    logger.info("avg explosion %s", (len(atap_btap_df.index) / (1.0 * len(od_df.index))))

    if trace_od:
        trace_orig, trace_dest = trace_od
        trace_oabd_rows = (atap_btap_df.omaz == trace_orig) & (atap_btap_df.dmaz == trace_dest)
    else:
        trace_oabd_rows = None

    constants = config.get_model_constants(model_settings)
    locals_d = {
        'np': np,
        'network_los': network_los
    }
    if constants is not None:
        locals_d.update(constants)

    results, trace_results, trace_assigned_locals \
        = assign.assign_variables(best_transit_path_spec, atap_btap_df, locals_d,
                                  trace_rows=trace_oabd_rows)

    # copy results
    for column in results.columns:
        atap_btap_df[column] = results[column]

    # drop rows if no utility
    n = len(atap_btap_df.index)
    atap_btap_df = atap_btap_df.dropna(subset=['utility'])

    logger.info("Dropped %s of %s rows with null utility", n - len(atap_btap_df.index), n)

    # choose max utility
    atap_btap_df = atap_btap_df.sort_values(by='utility').groupby('idx').tail(1)

    if trace_od:

        if not trace_oabd_rows.any():
            logger.warning("trace_od not found origin = %s, dest = %s", trace_orig, trace_dest)
        else:

            tracing.trace_df(atap_btap_df,
                             label='best_transit_path',
                             slicer='NONE',
                             transpose=False)

            tracing.trace_df(trace_results,
                             label='trace_best_transit_path',
                             slicer='NONE',
                             transpose=False)

            if trace_assigned_locals:
                tracing.write_csv(trace_assigned_locals, file_name="trace_best_transit_path_locals")
def atwork_subtour_scheduling(
        tours,
        persons_merged,
        tdd_alts,
        skim_dict,
        chunk_size,
        trace_hh_id):
    """
    This model predicts the departure time and duration of each activity for at work subtours tours
    """

    trace_label = 'atwork_subtour_scheduling'
    model_settings = config.read_model_settings('tour_scheduling_atwork.yaml')
    model_spec = simulate.read_model_spec(file_name='tour_scheduling_atwork.csv')

    persons_merged = persons_merged.to_frame()

    tours = tours.to_frame()
    subtours = tours[tours.tour_category == 'atwork']

    # - if no atwork subtours
    if subtours.shape[0] == 0:
        tracing.no_results(trace_label)
        return

    logger.info("Running %s with %d tours", trace_label, len(subtours))

    # preprocessor
    constants = config.get_model_constants(model_settings)
    od_skim_wrapper = skim_dict.wrap('origin', 'destination')
    do_skim_wrapper = skim_dict.wrap('destination', 'origin')
    skims = {
        "od_skims": od_skim_wrapper,
        "do_skims": do_skim_wrapper,
    }
    annotate_preprocessors(
        subtours, constants, skims,
        model_settings, trace_label)

    # parent_tours table with columns ['tour_id', 'tdd'] index = tour_id
    parent_tour_ids = subtours.parent_tour_id.astype(int).unique()
    parent_tours = pd.DataFrame({'tour_id': parent_tour_ids}, index=parent_tour_ids)
    parent_tours = parent_tours.merge(tours[['tdd']], left_index=True, right_index=True)

    tdd_choices = vectorize_subtour_scheduling(
        parent_tours,
        subtours,
        persons_merged,
        tdd_alts, model_spec,
        model_settings,
        chunk_size=chunk_size,
        trace_label=trace_label)

    assign_in_place(tours, tdd_choices)
    pipeline.replace_table("tours", tours)

    if trace_hh_id:
        tracing.trace_df(tours[tours.tour_category == 'atwork'],
                         label="atwork_subtour_scheduling",
                         slicer='person_id',
                         index_label='tour_id',
                         columns=None)

    if DUMP:
        subtours = tours[tours.tour_category == 'atwork']
        parent_tours = tours[tours.index.isin(subtours.parent_tour_id)]

        tracing.dump_df(DUMP, subtours, trace_label, 'sub_tours')
        tracing.dump_df(DUMP, parent_tours, trace_label, 'parent_tours')

        parent_tours['parent_tour_id'] = parent_tours.index
        subtours = pd.concat([parent_tours, subtours])
        tracing.dump_df(DUMP,
                        tt.tour_map(parent_tours, subtours, tdd_alts,
                                    persons_id_col='parent_tour_id'),
                        trace_label, 'tour_map')
Exemple #45
0
def compute_accessibility(accessibility, skim_dict, land_use, trace_od):

    """
    Compute accessibility for each zone in land use file using expressions from accessibility_spec

    The actual results depend on the expressions in accessibility_spec, but this is initially
    intended to permit implementation of the mtc accessibility calculation as implemented by
    Accessibility.job

    Compute measures of accessibility used by the automobile ownership model.
    The accessibility measure first multiplies an employment variable by a mode-specific decay
    function.  The product reflects the difficulty of accessing the activities the farther
    (in terms of round-trip travel time) the jobs are from the location in question. The products
    to each destination zone are next summed over each origin zone, and the logarithm of the
    product mutes large differences.  The decay function on the walk accessibility measure is
    steeper than automobile or transit.  The minimum accessibility is zero.
    """

    trace_label = 'compute_accessibility'
    model_settings = config.read_model_settings('accessibility.yaml')
    assignment_spec = assign.read_assignment_spec(config.config_file_path('accessibility.csv'))

    accessibility_df = accessibility.to_frame()

    logger.info("Running %s with %d dest zones" % (trace_label, len(accessibility_df)))

    constants = config.get_model_constants(model_settings)
    land_use_columns = model_settings.get('land_use_columns', [])

    land_use_df = land_use.to_frame()

    # #bug
    #
    # land_use_df = land_use_df[land_use_df.index % 2 == 1]
    # accessibility_df = accessibility_df[accessibility_df.index.isin(land_use_df.index)].head(5)
    #
    # print "land_use_df", land_use_df.index
    # print "accessibility_df", accessibility_df.index
    # #bug

    orig_zones = accessibility_df.index.values
    dest_zones = land_use_df.index.values

    orig_zone_count = len(orig_zones)
    dest_zone_count = len(dest_zones)

    logger.info("Running %s with %d dest zones %d orig zones" %
                (trace_label, dest_zone_count, orig_zone_count))

    # create OD dataframe
    od_df = pd.DataFrame(
        data={
            'orig': np.repeat(np.asanyarray(accessibility_df.index), dest_zone_count),
            'dest': np.tile(np.asanyarray(land_use_df.index), orig_zone_count)
        }
    )

    if trace_od:
        trace_orig, trace_dest = trace_od
        trace_od_rows = (od_df.orig == trace_orig) & (od_df.dest == trace_dest)
    else:
        trace_od_rows = None

    # merge land_use_columns into od_df
    land_use_df = land_use_df[land_use_columns]
    od_df = pd.merge(od_df, land_use_df, left_on='dest', right_index=True).sort_index()

    locals_d = {
        'log': np.log,
        'exp': np.exp,
        'skim_od': AccessibilitySkims(skim_dict, orig_zones, dest_zones),
        'skim_do': AccessibilitySkims(skim_dict, orig_zones, dest_zones, transpose=True)
    }
    if constants is not None:
        locals_d.update(constants)

    results, trace_results, trace_assigned_locals \
        = assign.assign_variables(assignment_spec, od_df, locals_d, trace_rows=trace_od_rows)

    for column in results.columns:
        data = np.asanyarray(results[column])
        data.shape = (orig_zone_count, dest_zone_count)
        accessibility_df[column] = np.log(np.sum(data, axis=1) + 1)

    # - write table to pipeline
    pipeline.replace_table("accessibility", accessibility_df)

    if trace_od:

        if not trace_od_rows.any():
            logger.warning("trace_od not found origin = %s, dest = %s" % (trace_orig, trace_dest))
        else:

            # add OD columns to trace results
            df = pd.concat([od_df[trace_od_rows], trace_results], axis=1)

            # dump the trace results table (with _temp variables) to aid debugging
            tracing.trace_df(df,
                             label='accessibility',
                             index_label='skim_offset',
                             slicer='NONE',
                             warn_if_empty=True)

            if trace_assigned_locals:
                tracing.write_csv(trace_assigned_locals, file_name="accessibility_locals")
Exemple #46
0
def households(households_sample_size, override_hh_ids, trace_hh_id):

    df_full = read_input_table("households")
    households_sliced = False

    logger.info("full household list contains %s households" % df_full.shape[0])

    # only using households listed in override_hh_ids
    if override_hh_ids is not None:

        # trace_hh_id will not used if it is not in list of override_hh_ids
        logger.info("override household list containing %s households" % len(override_hh_ids))

        df = df_full[df_full.index.isin(override_hh_ids)]
        households_sliced = True

        if df.shape[0] < len(override_hh_ids):
            logger.info("found %s of %s households in override household list" %
                        (df.shape[0], len(override_hh_ids)))

        if df.shape[0] == 0:
            raise RuntimeError('No override households found in store')

    # if we are tracing hh exclusively
    elif trace_hh_id and households_sample_size == 1:

        # df contains only trace_hh (or empty if not in full store)
        df = tracing.slice_ids(df_full, trace_hh_id)
        households_sliced = True

    # if we need a subset of full store
    elif households_sample_size > 0 and df_full.shape[0] > households_sample_size:

        logger.info("sampling %s of %s households" % (households_sample_size, df_full.shape[0]))

        """
        Because random seed is set differently for each step, sampling of households using
        Random.global_rng would sample differently depending upon which step it was called from.
        We use a one-off rng seeded with the pseudo step name 'sample_households' to provide
        repeatable sampling no matter when the table is loaded.

        Note that the external_rng is also seeded with base_seed so the sample will (rightly) change
        if the pipeline rng's base_seed is changed
        """

        prng = pipeline.get_rn_generator().get_external_rng('sample_households')
        df = df_full.take(prng.choice(len(df_full), size=households_sample_size, replace=False))
        households_sliced = True

        # if tracing and we missed trace_hh in sample, but it is in full store
        if trace_hh_id and trace_hh_id not in df.index and trace_hh_id in df_full.index:
            # replace first hh in sample with trace_hh
            logger.debug("replacing household %s with %s in household sample" %
                         (df.index[0], trace_hh_id))
            df_hh = df_full.loc[[trace_hh_id]]
            df = pd.concat([df_hh, df[1:]])

    else:
        df = df_full

    # persons table
    inject.add_injectable('households_sliced', households_sliced)

    logger.info("loaded households %s" % (df.shape,))

    df.index.name = 'household_id'

    # FIXME - pathological knowledge of name of chunk_id column used by chunked_choosers_by_chunk_id
    assert 'chunk_id' not in df.columns
    df['chunk_id'] = pd.Series(list(range(len(df))), df.index)

    # replace table function with dataframe
    inject.add_table('households', df)

    pipeline.get_rn_generator().add_channel('households', df)

    if trace_hh_id:
        tracing.register_traceable_table('households', df)
        tracing.trace_df(df, "raw.households", warn_if_empty=True)

    return df
Exemple #47
0
def tour_mode_choice_simulate(tours, persons_merged,
                              skim_dict, skim_stack,
                              chunk_size,
                              trace_hh_id):
    """
    Tour mode choice simulate
    """
    trace_label = 'tour_mode_choice'
    model_settings = config.read_model_settings('tour_mode_choice.yaml')

    spec = tour_mode_choice_spec(model_settings)

    primary_tours = tours.to_frame()

    assert not (primary_tours.tour_category == 'atwork').any()

    persons_merged = persons_merged.to_frame()

    nest_spec = config.get_logit_model_settings(model_settings)
    constants = config.get_model_constants(model_settings)

    logger.info("Running %s with %d tours" % (trace_label, primary_tours.shape[0]))

    tracing.print_summary('tour_types',
                          primary_tours.tour_type, value_counts=True)

    primary_tours_merged = pd.merge(primary_tours, persons_merged, left_on='person_id',
                                    right_index=True, how='left', suffixes=('', '_r'))

    # setup skim keys
    orig_col_name = 'TAZ'
    dest_col_name = 'destination'
    out_time_col_name = 'start'
    in_time_col_name = 'end'
    odt_skim_stack_wrapper = skim_stack.wrap(left_key=orig_col_name, right_key=dest_col_name,
                                             skim_key='out_period')
    dot_skim_stack_wrapper = skim_stack.wrap(left_key=dest_col_name, right_key=orig_col_name,
                                             skim_key='in_period')
    od_skim_stack_wrapper = skim_dict.wrap(orig_col_name, dest_col_name)

    skims = {
        "odt_skims": odt_skim_stack_wrapper,
        "dot_skims": dot_skim_stack_wrapper,
        "od_skims": od_skim_stack_wrapper,
        'orig_col_name': orig_col_name,
        'dest_col_name': dest_col_name,
        'out_time_col_name': out_time_col_name,
        'in_time_col_name': in_time_col_name
    }

    choices_list = []
    for tour_type, segment in primary_tours_merged.groupby('tour_type'):

        logger.info("tour_mode_choice_simulate tour_type '%s' (%s tours)" %
                    (tour_type, len(segment.index), ))

        # name index so tracing knows how to slice
        assert segment.index.name == 'tour_id'

        choices = run_tour_mode_choice_simulate(
            segment,
            spec, tour_type, model_settings,
            skims=skims,
            constants=constants,
            nest_spec=nest_spec,
            chunk_size=chunk_size,
            trace_label=tracing.extend_trace_label(trace_label, tour_type),
            trace_choice_name='tour_mode_choice')

        tracing.print_summary('tour_mode_choice_simulate %s choices' % tour_type,
                              choices, value_counts=True)

        choices_list.append(choices)

        # FIXME - force garbage collection
        force_garbage_collect()

    choices = pd.concat(choices_list)

    tracing.print_summary('tour_mode_choice_simulate all tour type choices',
                          choices, value_counts=True)

    # so we can trace with annotations
    primary_tours['tour_mode'] = choices

    # but only keep mode choice col
    all_tours = tours.to_frame()
    # uncomment to save annotations to table
    # assign_in_place(all_tours, annotations)
    assign_in_place(all_tours, choices.to_frame('tour_mode'))

    pipeline.replace_table("tours", all_tours)

    if trace_hh_id:
        tracing.trace_df(primary_tours,
                         label=tracing.extend_trace_label(trace_label, 'tour_mode'),
                         slicer='tour_id',
                         index_label='tour_id',
                         warn_if_empty=True)
def atwork_subtour_mode_choice(
        tours,
        persons_merged,
        skim_dict, skim_stack,
        chunk_size,
        trace_hh_id):
    """
    At-work subtour mode choice simulate
    """

    trace_label = 'atwork_subtour_mode_choice'

    model_settings = config.read_model_settings('tour_mode_choice.yaml')

    spec = tour_mode_choice_spec(model_settings)

    tours = tours.to_frame()
    subtours = tours[tours.tour_category == 'atwork']

    # - if no atwork subtours
    if subtours.shape[0] == 0:
        tracing.no_results(trace_label)
        return

    subtours_merged = \
        pd.merge(subtours, persons_merged.to_frame(),
                 left_on='person_id', right_index=True, how='left')

    nest_spec = config.get_logit_model_settings(model_settings)
    constants = config.get_model_constants(model_settings)

    logger.info("Running %s with %d subtours" % (trace_label, subtours_merged.shape[0]))

    tracing.print_summary('%s tour_type' % trace_label,
                          subtours_merged.tour_type, value_counts=True)

    # setup skim keys
    orig_col_name = 'workplace_taz'
    dest_col_name = 'destination'
    out_time_col_name = 'start'
    in_time_col_name = 'end'
    odt_skim_stack_wrapper = skim_stack.wrap(left_key=orig_col_name, right_key=dest_col_name,
                                             skim_key='out_period')
    dot_skim_stack_wrapper = skim_stack.wrap(left_key=dest_col_name, right_key=orig_col_name,
                                             skim_key='in_period')
    od_skim_stack_wrapper = skim_dict.wrap(orig_col_name, dest_col_name)

    skims = {
        "odt_skims": odt_skim_stack_wrapper,
        "dot_skims": dot_skim_stack_wrapper,
        "od_skims": od_skim_stack_wrapper,
        'orig_col_name': orig_col_name,
        'dest_col_name': dest_col_name,
        'out_time_col_name': out_time_col_name,
        'in_time_col_name': in_time_col_name
    }

    choices = run_tour_mode_choice_simulate(
        subtours_merged,
        spec, tour_purpose='atwork', model_settings=model_settings,
        skims=skims,
        constants=constants,
        nest_spec=nest_spec,
        chunk_size=chunk_size,
        trace_label=trace_label,
        trace_choice_name='tour_mode_choice')

    tracing.print_summary('%s choices' % trace_label, choices, value_counts=True)

    assign_in_place(tours, choices.to_frame('tour_mode'))
    pipeline.replace_table("tours", tours)

    if trace_hh_id:
        tracing.trace_df(tours[tours.tour_category == 'atwork'],
                         label=tracing.extend_trace_label(trace_label, 'tour_mode'),
                         slicer='tour_id',
                         index_label='tour_id')

    force_garbage_collect()
def non_mandatory_tour_frequency(persons, persons_merged,
                                 chunk_size,
                                 trace_hh_id):
    """
    This model predicts the frequency of making non-mandatory trips
    (alternatives for this model come from a separate csv file which is
    configured by the user) - these trips include escort, shopping, othmaint,
    othdiscr, eatout, and social trips in various combination.
    """

    trace_label = 'non_mandatory_tour_frequency'
    model_settings = config.read_model_settings('non_mandatory_tour_frequency.yaml')
    model_spec = simulate.read_model_spec(file_name='non_mandatory_tour_frequency.csv')

    alternatives = simulate.read_model_alts(
        config.config_file_path('non_mandatory_tour_frequency_alternatives.csv'),
        set_index=None)

    choosers = persons_merged.to_frame()

    # FIXME kind of tacky both that we know to add this here and del it below
    # 'tot_tours' is used in model_spec expressions
    alternatives['tot_tours'] = alternatives.sum(axis=1)

    # - preprocessor
    preprocessor_settings = model_settings.get('preprocessor', None)
    if preprocessor_settings:

        locals_dict = {
            'person_max_window': person_max_window
        }

        expressions.assign_columns(
            df=choosers,
            model_settings=preprocessor_settings,
            locals_dict=locals_dict,
            trace_label=trace_label)

    # filter based on results of CDAP
    choosers = choosers[choosers.cdap_activity.isin(['M', 'N'])]

    logger.info("Running non_mandatory_tour_frequency with %d persons", len(choosers))

    constants = config.get_model_constants(model_settings)

    choices_list = []
    # segment by person type and pick the right spec for each person type
    for ptype, segment in choosers.groupby('ptype'):

        name = PTYPE_NAME[ptype]

        # pick the spec column for the segment
        spec = model_spec[[name]]

        # drop any zero-valued rows
        spec = spec[spec[name] != 0]

        logger.info("Running segment '%s' of size %d", name, len(segment))

        choices = interaction_simulate(
            segment,
            alternatives,
            spec=spec,
            locals_d=constants,
            chunk_size=chunk_size,
            trace_label='non_mandatory_tour_frequency.%s' % name,
            trace_choice_name='non_mandatory_tour_frequency')

        choices_list.append(choices)

        # FIXME - force garbage collection?
        # force_garbage_collect()

    choices = pd.concat(choices_list)

    del alternatives['tot_tours']  # del tot_tours column we added above

    # - add non_mandatory_tour_frequency column to persons
    persons = persons.to_frame()
    # need to reindex as we only handled persons with cdap_activity in ['M', 'N']
    # (we expect there to be an alt with no tours - which we can use to backfill non-travelers)
    no_tours_alt = (alternatives.sum(axis=1) == 0).index[0]
    persons['non_mandatory_tour_frequency'] = \
        choices.reindex(persons.index).fillna(no_tours_alt).astype(np.int8)

    """
    We have now generated non-mandatory tours, but they are attributes of the person table
    Now we create a "tours" table which has one row per tour that has been generated
    (and the person id it is associated with)
    """

    # - get counts of each of the alternatives (so we can extend)
    # (choices is just the index values for the chosen alts)
    """
               escort  shopping  othmaint  othdiscr    eatout    social
    parent_id
    2588676         2         0         0         1         1         0
    2588677         0         1         0         1         0         0
    """
    tour_counts = alternatives.loc[choices]
    tour_counts.index = choices.index  # assign person ids to the index

    prev_tour_count = tour_counts.sum().sum()

    # - extend_tour_counts
    tour_counts = extend_tour_counts(choosers, tour_counts, alternatives,
                                     trace_hh_id,
                                     tracing.extend_trace_label(trace_label, 'extend_tour_counts'))

    extended_tour_count = tour_counts.sum().sum()

    logging.info("extend_tour_counts increased nmtf tour count by %s from %s to %s" %
                 (extended_tour_count - prev_tour_count, prev_tour_count, extended_tour_count))

    # - create the non_mandatory tours
    non_mandatory_tours = process_non_mandatory_tours(persons, tour_counts)
    assert len(non_mandatory_tours) == extended_tour_count

    pipeline.extend_table("tours", non_mandatory_tours)

    tracing.register_traceable_table('tours', non_mandatory_tours)
    pipeline.get_rn_generator().add_channel('tours', non_mandatory_tours)

    expressions.assign_columns(
        df=persons,
        model_settings=model_settings.get('annotate_persons'),
        trace_label=trace_label)

    pipeline.replace_table("persons", persons)

    tracing.print_summary('non_mandatory_tour_frequency',
                          persons.non_mandatory_tour_frequency, value_counts=True)

    if trace_hh_id:
        tracing.trace_df(non_mandatory_tours,
                         label="non_mandatory_tour_frequency.non_mandatory_tours",
                         warn_if_empty=True)

        tracing.trace_df(choosers,
                         label="non_mandatory_tour_frequency.choosers",
                         warn_if_empty=True)

        tracing.trace_df(persons,
                         label="non_mandatory_tour_frequency.annotated_persons",
                         warn_if_empty=True)
Exemple #50
0
def build_cdap_spec(interaction_coefficients, hhsize,
                    trace_spec=False, trace_label=None, cache=True):
    """
    Build a spec file for computing utilities of alternative household member interaction patterns
    for households of specified size.

    We generate this spec automatically from a table of rules and coefficients because the
    interaction rules are fairly simple and can be expressed compactly whereas
    there is a lot of redundancy between the spec files for different household sizes, as well as
    in the vectorized expression of the interaction alternatives within the spec file itself

    interaction_coefficients has five columns:
        activity
            A single character activity type name (M, N, or H)
        interaction_ptypes
            List of ptypes in the interaction (in order of increasing ptype) or empty for wildcards
            (meaning that the interaction applies to all ptypes in that size hh)
        cardinality
            the number of persons in the interaction (e.g. 3 for a 3-way interaction)
        slug
            a human friendly efficient name so we can dump a readable spec trace file for debugging
            this slug is replaced with the numerical coefficient value after we dump the trace file
        coefficient
            The coefficient to apply for all hh interactions for this activity and set of ptypes

    The generated spec will have the eval expression in the index, and a utility column for each
    alternative (e.g. ['HH', 'HM', 'HN', 'MH', 'MM', 'MN', 'NH', 'NM', 'NN'] for hhsize 2)

    In order to be able to dump the spec in a human-friendly fashion to facilitate debugging the
    cdap_interaction_coefficients table, we first populate utility columns in the spec file
    with the coefficient slugs, dump the spec file, and then replace the slugs with coefficients.

    Parameters
    ----------
    interaction_coefficients : pandas.DataFrame
        Rules and coefficients for generating interaction specs for different household sizes
    hhsize : int
        household size for which the spec should be built.

    Returns
    -------
    spec: pandas.DataFrame

    """

    t0 = tracing.print_elapsed_time()

    # if DUMP:
    #     # dump the interaction_coefficients table because it has been preprocessed
    #     tracing.trace_df(interaction_coefficients,
    #                      '%s.hhsize%d_interaction_coefficients' % (trace_label, hhsize),
    #                      transpose=False, slicer='NONE')

    # cdap spec is same for all households of MAX_HHSIZE and greater
    hhsize = min(hhsize, MAX_HHSIZE)

    if cache:
        spec = get_cached_spec(hhsize)
        if spec is not None:
            return spec

    expression_name = "Expression"

    # generate a list of activity pattern alternatives for this hhsize
    # e.g. ['HH', 'HM', 'HN', 'MH', 'MM', 'MN', 'NH', 'NM', 'NN'] for hhsize=2
    alternatives = [''.join(tup) for tup in itertools.product('HMN', repeat=hhsize)]

    # spec df has expression column plus a column for each alternative
    spec = pd.DataFrame(columns=[expression_name] + alternatives)

    # Before processing the interaction_coefficients, we add add rows to the spec to carry
    # the alternative utilities previously computed for each individual into all hh alternative
    # columns in which the individual assigned that alternative. The Expression column contains
    # the name of the choosers column with that individuals utility for the individual alternative
    # and the hh alternative columns that should receive that utility are given a value of 1
    # e.g. M_p1 is a column in choosers with the individual utility to person p1 of alternative M
    #   Expression   MM   MN   MH   NM   NN   NH   HM   HN   HH
    #         M_p1  1.0  1.0  1.0  0.0  0.0  0.0  0.0  0.0  0.0
    #         N_p1  0.0  0.0  0.0  1.0  1.0  1.0  0.0  0.0  0.0
    for pnum in range(1, hhsize+1):
        for activity in ['M', 'N', 'H']:

            new_row_index = len(spec)
            spec.loc[new_row_index, expression_name] = add_pn(activity, pnum)

            # list of alternative columns where person pnum has expression activity
            # e.g. for M_p1 we want the columns where activity M is in position p1
            alternative_columns = [alt for alt in alternatives if alt[pnum - 1] == activity]
            spec.loc[new_row_index, alternative_columns] = 1

    # ignore rows whose cardinality exceeds hhsize
    relevant_rows = interaction_coefficients.cardinality <= hhsize

    # for each row in the interaction_coefficients table
    for row in interaction_coefficients[relevant_rows].itertuples():

        # if it is a wildcard all_people interaction
        if not row.interaction_ptypes:

            # wildcard interactions only apply if the interaction includes all household members
            # this will be the case if the cardinality of the wildcard equals the hhsize
            # conveniently, the slug is given the name of the alternative column (e.g. HHHH)

            # conveniently, for wildcards, the slug has been assigned the name of the alternative
            # (e.g. HHHH) that it applies to, since the interaction includes all household members
            # and there are no ptypes to append to it

            # FIXME - should we be doing this for greater than HH_MAXSIZE households?
            if row.slug in alternatives:
                spec.loc[len(spec), [expression_name, row.slug]] = ['1', row.slug]

            continue

        if not (0 <= row.cardinality <= MAX_INTERACTION_CARDINALITY):
            raise RuntimeError("Bad row cardinality %d for %s" % (row.cardinality, row.slug))

        # for all other interaction rules, we need to generate a row in the spec for each
        # possible combination of interacting persons
        # e.g. for (1, 2), (1,3), (2,3) for a coefficient with cardinality 2 in hhsize 3
        for tup in itertools.combinations(list(range(1, hhsize+1)), row.cardinality):

            # determine the name of the chooser column with the ptypes for this interaction
            if row.cardinality == 1:
                interaction_column = "ptype_p%d" % tup[0]
            else:
                # column named (e.g.) p1_p3 for an interaction between p1 and p3
                interaction_column = '_'.join(['p%s' % pnum for pnum in tup])

            # build expression that evaluates True iff the interaction is between specified ptypes
            # (e.g.) p1_p3==13 for an interaction between p1 and p3 of ptypes 1 and 3 (or 3 and1 )
            expression = "%s==%s" % (interaction_column, row.interaction_ptypes)

            # create list of columns with names matching activity for each of the persons in tup
            # e.g. ['MMM', 'MMN', 'MMH'] for an interaction between p1 and p3 with activity 'M'
            # alternative_columns = \
            #     filter(lambda alt: all([alt[p - 1] == row.activity for p in tup]), alternatives)
            alternative_columns = \
                [alt for alt in alternatives if all([alt[p - 1] == row.activity for p in tup])]

            # a row for this interaction may already exist,
            # e.g. if there are rules for both HH13 and MM13, we don't need to add rows for both
            # since they are triggered by the same expressions (e.g. p1_p2==13, p1_p3=13,...)
            existing_row_index = (spec[expression_name] == expression)
            if (existing_row_index).any():
                # if the rows exist, simply update the appropriate alternative columns in spec
                spec.loc[existing_row_index, alternative_columns] = row.slug
                spec.loc[existing_row_index, expression_name] = expression
            else:
                # otherwise, add a new row to spec
                new_row_index = len(spec)
                spec.loc[new_row_index, alternative_columns] = row.slug
                spec.loc[new_row_index, expression_name] = expression

    # eval expression goes in the index
    spec.set_index(expression_name, inplace=True)

    simulate.uniquify_spec_index(spec)

    if trace_spec:
        tracing.trace_df(spec, '%s.hhsize%d_spec' % (trace_label, hhsize),
                         transpose=False, slicer='NONE')

    # replace slug with coefficient
    d = interaction_coefficients.set_index('slug')['coefficient'].to_dict()
    for c in spec.columns:
        spec[c] =\
            spec[c].map(lambda x: d.get(x, x or 0.0)).fillna(0)

    if trace_spec:
        tracing.trace_df(spec, '%s.hhsize%d_spec_patched' % (trace_label, hhsize),
                         transpose=False, slicer='NONE')

    if cache:
        cache_spec(hhsize, spec)

    t0 = tracing.print_elapsed_time("build_cdap_spec hh_size %s" % hhsize, t0)

    return spec
def mandatory_tour_frequency(persons_merged,
                             chunk_size,
                             trace_hh_id):
    """
    This model predicts the frequency of making mandatory trips (see the
    alternatives above) - these trips include work and school in some combination.
    """
    trace_label = 'mandatory_tour_frequency'

    model_settings = config.read_model_settings('mandatory_tour_frequency.yaml')
    model_spec = simulate.read_model_spec(file_name='mandatory_tour_frequency.csv')
    alternatives = simulate.read_model_alts(
        config.config_file_path('mandatory_tour_frequency_alternatives.csv'), set_index='alt')

    choosers = persons_merged.to_frame()
    # filter based on results of CDAP
    choosers = choosers[choosers.cdap_activity == 'M']
    logger.info("Running mandatory_tour_frequency with %d persons", len(choosers))

    # - if no mandatory tours
    if choosers.shape[0] == 0:
        add_null_results(trace_label, model_settings)
        return

    # - preprocessor
    preprocessor_settings = model_settings.get('preprocessor', None)
    if preprocessor_settings:

        locals_dict = {}

        expressions.assign_columns(
            df=choosers,
            model_settings=preprocessor_settings,
            locals_dict=locals_dict,
            trace_label=trace_label)

    nest_spec = config.get_logit_model_settings(model_settings)
    constants = config.get_model_constants(model_settings)

    choices = simulate.simple_simulate(
        choosers=choosers,
        spec=model_spec,
        nest_spec=nest_spec,
        locals_d=constants,
        chunk_size=chunk_size,
        trace_label=trace_label,
        trace_choice_name='mandatory_tour_frequency')

    # convert indexes to alternative names
    choices = pd.Series(
        model_spec.columns[choices.values],
        index=choices.index).reindex(persons_merged.local.index)

    # - create mandatory tours
    """
    This reprocesses the choice of index of the mandatory tour frequency
    alternatives into an actual dataframe of tours.  Ending format is
    the same as got non_mandatory_tours except trip types are "work" and "school"
    """
    choosers['mandatory_tour_frequency'] = choices
    mandatory_tours = process_mandatory_tours(
        persons=choosers,
        mandatory_tour_frequency_alts=alternatives
    )

    tours = pipeline.extend_table("tours", mandatory_tours)
    tracing.register_traceable_table('tours', mandatory_tours)
    pipeline.get_rn_generator().add_channel('tours', mandatory_tours)

    # - annotate persons
    persons = inject.get_table('persons').to_frame()

    # need to reindex as we only handled persons with cdap_activity == 'M'
    persons['mandatory_tour_frequency'] = choices.reindex(persons.index).fillna('').astype(str)

    expressions.assign_columns(
        df=persons,
        model_settings=model_settings.get('annotate_persons'),
        trace_label=tracing.extend_trace_label(trace_label, 'annotate_persons'))

    pipeline.replace_table("persons", persons)

    tracing.print_summary('mandatory_tour_frequency', persons.mandatory_tour_frequency,
                          value_counts=True)

    if trace_hh_id:
        tracing.trace_df(mandatory_tours,
                         label="mandatory_tour_frequency.mandatory_tours",
                         warn_if_empty=True)

        tracing.trace_df(persons,
                         label="mandatory_tour_frequency.persons",
                         warn_if_empty=True)
def trip_purpose_and_destination(
        trips,
        tours_merged,
        chunk_size,
        trace_hh_id):

    trace_label = "trip_purpose_and_destination"
    model_settings = config.read_model_settings('trip_purpose_and_destination.yaml')

    MAX_ITERATIONS = model_settings.get('MAX_ITERATIONS', 5)

    trips_df = trips.to_frame()
    tours_merged_df = tours_merged.to_frame()

    if trips_df.empty:
        logger.info("%s - no trips. Nothing to do." % trace_label)
        return

    # FIXME could allow MAX_ITERATIONS=0 to allow for cleanup-only run
    # in which case, we would need to drop bad trips, WITHOUT failing bad_trip leg_mates
    assert (MAX_ITERATIONS > 0)

    # if trip_destination has been run before, keep only failed trips (and leg_mates) to retry
    if 'destination' in trips_df:
        if trips_df.failed.any():
            logger.info('trip_destination has already been run. Rerunning failed trips')
            flag_failed_trip_leg_mates(trips_df, 'failed')
            trips_df = trips_df[trips_df.failed]
            tours_merged_df = tours_merged_df[tours_merged_df.index.isin(trips_df.tour_id)]
            logger.info('Rerunning %s failed trips and leg-mates' % trips_df.shape[0])
        else:
            # no failed trips from prior run of trip_destination
            logger.info("%s - no failed trips from prior model run." % trace_label)
            del trips_df['failed']
            pipeline.replace_table("trips", trips_df)
            return

    results = []
    i = 0
    RESULT_COLUMNS = ['purpose', 'destination', 'origin', 'failed']
    while True:

        i += 1

        for c in RESULT_COLUMNS:
            if c in trips_df:
                del trips_df[c]

        trips_df = run_trip_purpose_and_destination(
            trips_df,
            tours_merged_df,
            chunk_size,
            trace_hh_id,
            trace_label=tracing.extend_trace_label(trace_label, "i%s" % i))

        num_failed_trips = trips_df.failed.sum()

        # if there were no failed trips, we are done
        if num_failed_trips == 0:
            results.append(trips_df[RESULT_COLUMNS])
            break

        logger.warning("%s %s failed trips in iteration %s" % (trace_label, num_failed_trips, i))
        file_name = "%s_i%s_failed_trips" % (trace_label, i)
        logger.info("writing failed trips to %s" % file_name)
        tracing.write_csv(trips_df[trips_df.failed], file_name=file_name, transpose=False)

        # if max iterations reached, add remaining trips to results and give up
        # note that we do this BEFORE failing leg_mates so resulting trip legs are complete
        if i >= MAX_ITERATIONS:
            logger.warning("%s too many iterations %s" % (trace_label, i))
            results.append(trips_df[RESULT_COLUMNS])
            break

        # otherwise, if any trips failed, then their leg-mates trips must also fail
        flag_failed_trip_leg_mates(trips_df, 'failed')

        # add the good trips to results
        results.append(trips_df[~trips_df.failed][RESULT_COLUMNS])

        # and keep the failed ones to retry
        trips_df = trips_df[trips_df.failed]
        tours_merged_df = tours_merged_df[tours_merged_df.index.isin(trips_df.tour_id)]

    # - assign result columns to trips
    results = pd.concat(results)

    logger.info("%s %s failed trips after %s iterations" % (trace_label, results.failed.sum(), i))

    trips_df = trips.to_frame()
    assign_in_place(trips_df, results)

    trips_df = cleanup_failed_trips(trips_df)

    pipeline.replace_table("trips", trips_df)

    if trace_hh_id:
        tracing.trace_df(trips_df,
                         label=trace_label,
                         slicer='trip_id',
                         index_label='trip_id',
                         warn_if_empty=True)
def joint_tour_scheduling(
        tours,
        persons_merged,
        tdd_alts,
        chunk_size,
        trace_hh_id):
    """
    This model predicts the departure time and duration of each joint tour
    """
    trace_label = 'joint_tour_scheduling'
    model_settings = config.read_model_settings('joint_tour_scheduling.yaml')
    model_spec = simulate.read_model_spec(file_name='tour_scheduling_joint.csv')

    tours = tours.to_frame()
    joint_tours = tours[tours.tour_category == 'joint']

    # - if no joint tours
    if joint_tours.shape[0] == 0:
        tracing.no_results(trace_label)
        return

    # use inject.get_table as this won't exist if there are no joint_tours
    joint_tour_participants = inject.get_table('joint_tour_participants').to_frame()

    persons_merged = persons_merged.to_frame()

    logger.info("Running %s with %d joint tours", trace_label, joint_tours.shape[0])

    # it may seem peculiar that we are concerned with persons rather than households
    # but every joint tour is (somewhat arbitrarily) assigned a "primary person"
    # some of whose characteristics are used in the spec
    # and we get household attributes along with person attributes in persons_merged
    persons_merged = persons_merged[persons_merged.num_hh_joint_tours > 0]

    # since a households joint tours each potentially different participants
    # they may also have different joint tour masks (free time of all participants)
    # so we have to either chunk processing by joint_tour_num and build timetable by household
    # or build timetables by unique joint_tour

    constants = config.get_model_constants(model_settings)

    # - run preprocessor to annotate choosers
    preprocessor_settings = model_settings.get('preprocessor', None)
    if preprocessor_settings:

        locals_d = {}
        if constants is not None:
            locals_d.update(constants)

        expressions.assign_columns(
            df=joint_tours,
            model_settings=preprocessor_settings,
            locals_dict=locals_d,
            trace_label=trace_label)

    tdd_choices, timetable = vectorize_joint_tour_scheduling(
        joint_tours, joint_tour_participants,
        persons_merged,
        tdd_alts,
        spec=model_spec,
        model_settings=model_settings,
        chunk_size=chunk_size,
        trace_label=trace_label)

    timetable.replace_table()

    assign_in_place(tours, tdd_choices)
    pipeline.replace_table("tours", tours)

    # updated df for tracing
    joint_tours = tours[tours.tour_category == 'joint']

    if trace_hh_id:
        tracing.trace_df(joint_tours,
                         label="joint_tour_scheduling",
                         slicer='household_id')
Exemple #54
0
def stop_frequency(
        tours, tours_merged,
        stop_frequency_alts,
        skim_dict,
        chunk_size,
        trace_hh_id):
    """
    stop frequency model

    For each tour, shoose a number of intermediate inbound stops and outbound stops.
    Create a trip table with inbound and outbound trips.

    Thus, a tour with stop_frequency '2out_0in' will have two outbound and zero inbound stops,
    and four corresponding trips: three outbound, and one inbound.

    Adds stop_frequency str column to trips, with fields

    creates trips table with columns:

    ::

        - person_id
        - household_id
        - tour_id
        - primary_purpose
        - atwork
        - trip_num
        - outbound
        - trip_count

    """

    trace_label = 'stop_frequency'
    model_settings = config.read_model_settings('stop_frequency.yaml')

    tours = tours.to_frame()
    tours_merged = tours_merged.to_frame()

    assert not tours_merged.household_id.isnull().any()

    assert not (tours_merged.origin == -1).any()
    assert not (tours_merged.destination == -1).any()

    nest_spec = config.get_logit_model_settings(model_settings)
    constants = config.get_model_constants(model_settings)

    # - run preprocessor to annotate tours_merged
    preprocessor_settings = model_settings.get('preprocessor', None)
    if preprocessor_settings:

        # hack: preprocessor adds origin column in place if it does not exist already
        od_skim_stack_wrapper = skim_dict.wrap('origin', 'destination')
        skims = [od_skim_stack_wrapper]

        locals_dict = {
            "od_skims": od_skim_stack_wrapper
        }
        if constants is not None:
            locals_dict.update(constants)

        simulate.set_skim_wrapper_targets(tours_merged, skims)

        # this should be pre-slice as some expressions may count tours by type
        annotations = expressions.compute_columns(
            df=tours_merged,
            model_settings=preprocessor_settings,
            locals_dict=locals_dict,
            trace_label=trace_label)

        assign_in_place(tours_merged, annotations)

    tracing.print_summary('stop_frequency segments',
                          tours_merged.primary_purpose, value_counts=True)

    choices_list = []
    for segment_type, choosers in tours_merged.groupby('primary_purpose'):

        logging.info("%s running segment %s with %s chooser rows" %
                     (trace_label, segment_type, choosers.shape[0]))

        spec = simulate.read_model_spec(file_name='stop_frequency_%s.csv' % segment_type)

        assert spec is not None, "spec for segment_type %s not found" % segment_type

        choices = simulate.simple_simulate(
            choosers=choosers,
            spec=spec,
            nest_spec=nest_spec,
            locals_d=constants,
            chunk_size=chunk_size,
            trace_label=tracing.extend_trace_label(trace_label, segment_type),
            trace_choice_name='stops')

        # convert indexes to alternative names
        choices = pd.Series(spec.columns[choices.values], index=choices.index)

        choices_list.append(choices)

    choices = pd.concat(choices_list)

    tracing.print_summary('stop_frequency', choices, value_counts=True)

    # add stop_frequency choices to tours table
    assign_in_place(tours, choices.to_frame('stop_frequency'))

    if 'primary_purpose' not in tours.columns:
        assign_in_place(tours, tours_merged[['primary_purpose']])

    pipeline.replace_table("tours", tours)

    # create trips table
    trips = process_trips(tours, stop_frequency_alts)
    trips = pipeline.extend_table("trips", trips)
    tracing.register_traceable_table('trips', trips)
    pipeline.get_rn_generator().add_channel('trips', trips)

    if trace_hh_id:
        tracing.trace_df(tours,
                         label="stop_frequency.tours",
                         slicer='person_id',
                         columns=None)

        tracing.trace_df(trips,
                         label="stop_frequency.trips",
                         slicer='person_id',
                         columns=None)

        tracing.trace_df(annotations,
                         label="stop_frequency.annotations",
                         columns=None)

        tracing.trace_df(tours_merged,
                         label="stop_frequency.tours_merged",
                         slicer='person_id',
                         columns=None)
def school_location_simulate(persons_merged, school_location_sample,
                             school_location_spec, school_location_settings,
                             skim_dict, destination_size_terms, chunk_size,
                             trace_hh_id):
    """
    School location model on school_location_sample annotated with mode_choice logsum
    to select a school_taz from sample alternatives
    """

    choosers = persons_merged.to_frame()
    school_location_sample = school_location_sample.to_frame()
    destination_size_terms = destination_size_terms.to_frame()

    trace_label = 'school_location_simulate'
    alt_col_name = school_location_settings["ALT_COL_NAME"]

    constants = config.get_model_constants(school_location_settings)

    # create wrapper with keys for this lookup - in this case there is a TAZ in the choosers
    # and a TAZ in the alternatives which get merged during interaction
    # the skims will be available under the name "skims" for any @ expressions
    skims = skim_dict.wrap("TAZ", alt_col_name)

    locals_d = {
        'skims': skims,
    }
    if constants is not None:
        locals_d.update(constants)

    # FIXME - MEMORY HACK - only include columns actually used in spec
    chooser_columns = school_location_settings['SIMULATE_CHOOSER_COLUMNS']
    choosers = choosers[chooser_columns]
    tracing.dump_df(DUMP, choosers, 'school_location_simulate', 'choosers')

    choices_list = []
    for school_type in ['university', 'highschool', 'gradeschool']:

        locals_d['segment'] = school_type

        choosers_segment = choosers[choosers["is_" + school_type]]
        alts_segment = school_location_sample[
            school_location_sample['school_type'] == school_type]

        # alternatives are pre-sampled and annotated with logsums and pick_count
        # but we have to merge additional alt columns into alt sample list
        alts_segment = \
            pd.merge(alts_segment, destination_size_terms,
                     left_on=alt_col_name, right_index=True, how="left")

        tracing.dump_df(DUMP, alts_segment, trace_label,
                        '%s_alternatives' % school_type)

        choices = interaction_sample_simulate(
            choosers_segment,
            alts_segment,
            spec=school_location_spec[[school_type]],
            choice_column=alt_col_name,
            skims=skims,
            locals_d=locals_d,
            chunk_size=chunk_size,
            trace_label=tracing.extend_trace_label(trace_label, school_type),
            trace_choice_name='school_location')

        choices_list.append(choices)

    choices = pd.concat(choices_list)

    # We only chose school locations for the subset of persons who go to school
    # so we backfill the empty choices with -1 to code as no school location
    choices = choices.reindex(persons_merged.index).fillna(-1).astype(int)

    tracing.dump_df(DUMP, choices, trace_label, 'choices')

    tracing.print_summary('school_taz', choices, describe=True)

    inject.add_column("persons", "school_taz", choices)

    pipeline.add_dependent_columns("persons", "persons_school")

    if trace_hh_id:
        trace_columns = ['school_taz'
                         ] + inject.get_table('persons_school').columns
        tracing.trace_df(inject.get_table('persons_merged').to_frame(),
                         label="school_location",
                         columns=trace_columns,
                         warn_if_empty=True)
def joint_tour_composition(
        tours, households, persons,
        chunk_size,
        trace_hh_id):
    """
    This model predicts the makeup of the travel party (adults, children, or mixed).
    """
    trace_label = 'joint_tour_composition'

    model_settings = config.read_model_settings('joint_tour_composition.yaml')
    model_spec = simulate.read_model_spec(file_name='joint_tour_composition.csv')

    tours = tours.to_frame()
    joint_tours = tours[tours.tour_category == 'joint']

    # - if no joint tours
    if joint_tours.shape[0] == 0:
        add_null_results(trace_label, tours)
        return

    # - only interested in households with joint_tours
    households = households.to_frame()
    households = households[households.num_hh_joint_tours > 0]

    persons = persons.to_frame()
    persons = persons[persons.household_id.isin(households.index)]

    logger.info("Running joint_tour_composition with %d joint tours" % joint_tours.shape[0])

    # - run preprocessor
    preprocessor_settings = model_settings.get('preprocessor', None)
    if preprocessor_settings:

        locals_dict = {
            'persons': persons,
            'hh_time_window_overlap': hh_time_window_overlap
        }

        expressions.assign_columns(
            df=households,
            model_settings=preprocessor_settings,
            locals_dict=locals_dict,
            trace_label=trace_label)

    joint_tours_merged = pd.merge(joint_tours, households,
                                  left_on='household_id', right_index=True, how='left')

    # - simple_simulate

    nest_spec = config.get_logit_model_settings(model_settings)
    constants = config.get_model_constants(model_settings)

    choices = simulate.simple_simulate(
        choosers=joint_tours_merged,
        spec=model_spec,
        nest_spec=nest_spec,
        locals_d=constants,
        chunk_size=chunk_size,
        trace_label=trace_label,
        trace_choice_name='composition')

    # convert indexes to alternative names
    choices = pd.Series(model_spec.columns[choices.values], index=choices.index)

    # add composition column to tours for tracing
    joint_tours['composition'] = choices

    # reindex since we ran model on a subset of households
    tours['composition'] = choices.reindex(tours.index).fillna('').astype(str)
    pipeline.replace_table("tours", tours)

    tracing.print_summary('joint_tour_composition', joint_tours.composition,
                          value_counts=True)

    if trace_hh_id:
        tracing.trace_df(joint_tours,
                         label="joint_tour_composition.joint_tours",
                         slicer='household_id')