def atwork_subtour_destination_logsums(
        persons_merged,
        destination_sample,
        skim_dict, skim_stack,
        chunk_size, trace_hh_id):
    """
    add logsum column to existing atwork_subtour_destination_sample table

    logsum is calculated by running the mode_choice model for each sample (person, dest_taz) pair
    in atwork_subtour_destination_sample, and computing the logsum of all the utilities

    +-----------+--------------+----------------+------------+----------------+
    | person_id | dest_TAZ     | rand           | pick_count | logsum (added) |
    +===========+==============+================+============+================+
    | 23750     |  14          | 0.565502716034 | 4          |  1.85659498857 |
    +-----------+--------------+----------------+------------+----------------+
    + 23750     | 16           | 0.711135838871 | 6          | 1.92315598631  |
    +-----------+--------------+----------------+------------+----------------+
    + ...       |              |                |            |                |
    +-----------+--------------+----------------+------------+----------------+
    | 23751     | 12           | 0.408038878552 | 1          | 2.40612135416  |
    +-----------+--------------+----------------+------------+----------------+
    | 23751     | 14           | 0.972732479292 | 2          |  1.44009018355 |
    +-----------+--------------+----------------+------------+----------------+

    """

    trace_label = 'atwork_subtour_destination_logsums'

    model_settings = config.read_model_settings('atwork_subtour_destination.yaml')
    logsum_settings = config.read_model_settings(model_settings['LOGSUM_SETTINGS'])

    # FIXME - MEMORY HACK - only include columns actually used in spec
    persons_merged = logsum.filter_chooser_columns(persons_merged, logsum_settings, model_settings)

    # merge persons into tours
    choosers = pd.merge(destination_sample,
                        persons_merged,
                        left_on='person_id',
                        right_index=True,
                        how="left")

    logger.info("Running %s with %s rows", trace_label, len(choosers))

    tracing.dump_df(DUMP, persons_merged, trace_label, 'persons_merged')
    tracing.dump_df(DUMP, choosers, trace_label, 'choosers')

    tour_purpose = 'atwork'
    logsums = logsum.compute_logsums(
        choosers,
        tour_purpose,
        logsum_settings, model_settings,
        skim_dict, skim_stack,
        chunk_size, trace_hh_id,
        trace_label)

    destination_sample['mode_choice_logsum'] = logsums

    return destination_sample
Esempio n. 2
0
def run_location_logsums(
        segment_name,
        persons_merged_df,
        skim_dict, skim_stack,
        location_sample_df,
        model_settings,
        chunk_size, trace_hh_id, trace_label):
    """
    add logsum column to existing location_sample table

    logsum is calculated by running the mode_choice model for each sample (person, dest_taz) pair
    in location_sample, and computing the logsum of all the utilities

    +-----------+--------------+----------------+------------+----------------+
    | PERID     | dest_TAZ     | rand           | pick_count | logsum (added) |
    +===========+==============+================+============+================+
    | 23750     |  14          | 0.565502716034 | 4          |  1.85659498857 |
    +-----------+--------------+----------------+------------+----------------+
    + 23750     | 16           | 0.711135838871 | 6          | 1.92315598631  |
    +-----------+--------------+----------------+------------+----------------+
    + ...       |              |                |            |                |
    +-----------+--------------+----------------+------------+----------------+
    | 23751     | 12           | 0.408038878552 | 1          | 2.40612135416  |
    +-----------+--------------+----------------+------------+----------------+
    | 23751     | 14           | 0.972732479292 | 2          |  1.44009018355 |
    +-----------+--------------+----------------+------------+----------------+
    """

    assert not location_sample_df.empty

    logsum_settings = config.read_model_settings(model_settings['LOGSUM_SETTINGS'])

    # FIXME - MEMORY HACK - only include columns actually used in spec
    persons_merged_df = \
        logsum.filter_chooser_columns(persons_merged_df, logsum_settings, model_settings)

    logger.info("Running %s with %s rows" % (trace_label, len(location_sample_df.index)))

    choosers = location_sample_df.join(persons_merged_df, how='left')

    tour_purpose = model_settings['LOGSUM_TOUR_PURPOSE']
    if isinstance(tour_purpose, dict):
        tour_purpose = tour_purpose[segment_name]

    logsums = logsum.compute_logsums(
        choosers,
        tour_purpose,
        logsum_settings, model_settings,
        skim_dict, skim_stack,
        chunk_size, trace_hh_id,
        trace_label)

    # "add_column series should have an index matching the table to which it is being added"
    # when the index has duplicates, however, in the special case that the series index exactly
    # matches the table index, then the series value order is preserved
    # logsums now does, since workplace_location_sample was on left side of merge de-dup merge
    location_sample_df['mode_choice_logsum'] = logsums

    return location_sample_df
Esempio n. 3
0
def free_parking(
        persons_merged, persons, households,
        skim_dict, skim_stack,
        chunk_size, trace_hh_id, locutor):
    """

    """

    trace_label = 'free_parking'
    model_settings = config.read_model_settings('free_parking.yaml')

    choosers = persons_merged.to_frame()
    choosers = choosers[choosers.workplace_taz > -1]

    logger.info("Running %s with %d persons", trace_label, len(choosers))

    constants = config.get_model_constants(model_settings)

    # - preprocessor
    preprocessor_settings = model_settings.get('preprocessor', None)
    if preprocessor_settings:

        locals_d = {}
        if constants is not None:
            locals_d.update(constants)

        expressions.assign_columns(
            df=choosers,
            model_settings=preprocessor_settings,
            locals_dict=locals_d,
            trace_label=trace_label)

    model_spec = simulate.read_model_spec(file_name='free_parking.csv')
    nest_spec = config.get_logit_model_settings(model_settings)

    choices = simulate.simple_simulate(
        choosers=choosers,
        spec=model_spec,
        nest_spec=nest_spec,
        locals_d=constants,
        chunk_size=chunk_size,
        trace_label=trace_label,
        trace_choice_name='free_parking_at_work')

    persons = persons.to_frame()

    # no need to reindex as we used all households
    free_parking_alt = model_settings['FREE_PARKING_ALT']
    choices = (choices == free_parking_alt)
    persons['free_parking_at_work'] = choices.reindex(persons.index).fillna(0).astype(bool)

    pipeline.replace_table("persons", persons)

    tracing.print_summary('free_parking', persons.free_parking_at_work, value_counts=True)

    if trace_hh_id:
        tracing.trace_df(persons,
                         label=trace_label,
                         warn_if_empty=True)
def atwork_subtour_destination_sample(
        tours,
        persons_merged,
        skim_dict,
        destination_size_terms,
        chunk_size, trace_hh_id):

    trace_label = 'atwork_subtour_location_sample'
    model_settings = config.read_model_settings('atwork_subtour_destination.yaml')
    model_spec = simulate.read_model_spec(file_name='atwork_subtour_destination_sample.csv')

    # merge persons into tours
    choosers = pd.merge(tours, persons_merged, left_on='person_id', right_index=True)
    # FIXME - MEMORY HACK - only include columns actually used in spec
    chooser_columns = model_settings['SIMULATE_CHOOSER_COLUMNS']
    choosers = choosers[chooser_columns]

    constants = config.get_model_constants(model_settings)

    sample_size = model_settings["SAMPLE_SIZE"]
    alt_dest_col_name = model_settings["ALT_DEST_COL_NAME"]

    logger.info("Running atwork_subtour_location_sample with %d tours", len(choosers))

    # create wrapper with keys for this lookup - in this case there is a workplace_taz
    # in the choosers and a TAZ in the alternatives which get merged during interaction
    # the skims will be available under the name "skims" for any @ expressions
    skims = skim_dict.wrap('workplace_taz', 'TAZ')

    locals_d = {
        'skims': skims
    }
    if constants is not None:
        locals_d.update(constants)

    choices = interaction_sample(
        choosers,
        alternatives=destination_size_terms,
        sample_size=sample_size,
        alt_col_name=alt_dest_col_name,
        spec=model_spec,
        skims=skims,
        locals_d=locals_d,
        chunk_size=chunk_size,
        trace_label=trace_label)

    # remember person_id in chosen alts so we can merge with persons in subsequent steps
    choices['person_id'] = choosers.person_id

    return choices
def non_mandatory_tour_destination(
        tours,
        persons_merged,
        skim_dict, skim_stack,
        chunk_size,
        trace_hh_id):

    """
    Given the tour generation from the above, each tour needs to have a
    destination, so in this case tours are the choosers (with the associated
    person that's making the tour)
    """

    trace_label = 'non_mandatory_tour_destination'
    model_settings = config.read_model_settings('non_mandatory_tour_destination.yaml')

    tours = tours.to_frame()

    persons_merged = persons_merged.to_frame()

    # choosers are tours - in a sense tours are choosing their destination
    non_mandatory_tours = tours[tours.tour_category == 'non_mandatory']

    # - if no mandatory_tours
    if non_mandatory_tours.shape[0] == 0:
        tracing.no_results(trace_label)
        return

    choices = tour_destination.run_tour_destination(
        tours,
        persons_merged,
        model_settings,
        skim_dict,
        skim_stack,
        chunk_size, trace_hh_id, trace_label)

    non_mandatory_tours['destination'] = choices

    assign_in_place(tours, non_mandatory_tours[['destination']])

    pipeline.replace_table("tours", tours)

    if trace_hh_id:
        tracing.trace_df(tours[tours.tour_category == 'non_mandatory'],
                         label="non_mandatory_tour_destination",
                         slicer='person_id',
                         index_label='tour',
                         columns=None,
                         warn_if_empty=True)
Esempio n. 6
0
def trip_destination(
        trips,
        tours_merged,
        chunk_size, trace_hh_id):
    """
    Choose a destination for all 'intermediate' trips based on trip purpose.

    Final trips already have a destination (the primary tour destination for outbound trips,
    and home for inbound trips.)


    """
    trace_label = 'trip_destination'
    model_settings = config.read_model_settings('trip_destination.yaml')
    CLEANUP = model_settings.get('CLEANUP', True)

    trips_df = trips.to_frame()
    tours_merged_df = tours_merged.to_frame()

    logger.info("Running %s with %d trips", trace_label, trips_df.shape[0])

    trips_df = run_trip_destination(
        trips_df,
        tours_merged_df,
        chunk_size, trace_hh_id,
        trace_label)

    if trips_df.failed.any():
        logger.warning("%s %s failed trips", trace_label, trips_df.failed.sum())
        file_name = "%s_failed_trips" % trace_label
        logger.info("writing failed trips to %s", file_name)
        tracing.write_csv(trips_df[trips_df.failed], file_name=file_name, transpose=False)

    if CLEANUP:
        trips_df = cleanup_failed_trips(trips_df)
    elif trips_df.failed.any():
        logger.warning("%s keeping %s sidelined failed trips" %
                       (trace_label, trips_df.failed.sum()))

    pipeline.replace_table("trips", trips_df)

    print("trips_df\n", trips_df.shape)

    if trace_hh_id:
        tracing.trace_df(trips_df,
                         label=trace_label,
                         slicer='trip_id',
                         index_label='trip_id',
                         warn_if_empty=True)
Esempio n. 7
0
def initialize_households():

    trace_label = 'initialize_households'

    model_settings = config.read_model_settings('initialize_households.yaml', mandatory=True)
    annotate_tables(model_settings, trace_label)

    # - initialize shadow_pricing size tables after annotating household and person tables
    # since these are scaled to model size, they have to be created while single-process
    shadow_pricing.add_size_tables()

    # - preload person_windows
    t0 = tracing.print_elapsed_time()
    inject.get_table('person_windows').to_frame()
    t0 = tracing.print_elapsed_time("preload person_windows", t0, debug=True)
Esempio n. 8
0
def initialize_landuse():

    trace_label = 'initialize_landuse'

    model_settings = config.read_model_settings('initialize_landuse.yaml', mandatory=True)

    annotate_tables(model_settings, trace_label)

    # create accessibility
    land_use = pipeline.get_table('land_use')

    accessibility_df = pd.DataFrame(index=land_use.index)

    # - write table to pipeline
    pipeline.replace_table("accessibility", accessibility_df)
Esempio n. 9
0
def workplace_location(
        persons_merged, persons, households,
        skim_dict, skim_stack,
        chunk_size, trace_hh_id, locutor):
    """
    workplace location choice model

    iterate_location_choice adds location choice column and annotations to persons table
    """

    trace_label = 'workplace_location'
    model_settings = config.read_model_settings('workplace_location.yaml')

    iterate_location_choice(
        model_settings,
        persons_merged, persons, households,
        skim_dict, skim_stack,
        chunk_size, trace_hh_id, locutor, trace_label
    )
Esempio n. 10
0
def get_shadow_pricing_info():
    """
    return dict with info about dtype and shapes of desired and modeled size tables

    block shape is (num_zones, num_segments + 1)


    Returns
    -------
    shadow_pricing_info: dict
        dtype: <sp_dtype>,
        block_shapes: dict {<model_selector>: <block_shape>}
    """

    land_use = inject.get_table('land_use')
    size_terms = inject.get_injectable('size_terms')

    shadow_settings = config.read_model_settings('shadow_pricing.yaml')

    # shadow_pricing_models is dict of {<model_selector>: <model_name>}
    shadow_pricing_models = shadow_settings['shadow_pricing_models']

    blocks = OrderedDict()
    for model_selector in shadow_pricing_models:

        sp_rows = len(land_use)
        sp_cols = len(size_terms[size_terms.model_selector == model_selector])

        # extra tally column for TALLY_CHECKIN and TALLY_CHECKOUT semaphores
        blocks[block_name(model_selector)] = (sp_rows, sp_cols + 1)

    sp_dtype = np.int64

    shadow_pricing_info = {
        'dtype': sp_dtype,
        'block_shapes': blocks,
    }

    for k in shadow_pricing_info:
        logger.debug("shadow_pricing_info %s: %s" % (k, shadow_pricing_info.get(k)))

    return shadow_pricing_info
Esempio n. 11
0
def run_destination_logsums(
        tour_purpose,
        persons_merged,
        destination_sample,
        model_settings,
        skim_dict, skim_stack,
        chunk_size, trace_hh_id, trace_label):
    """
    add logsum column to existing tour_destination_sample table

    logsum is calculated by running the mode_choice model for each sample (person, dest_taz) pair
    in destination_sample, and computing the logsum of all the utilities
    """

    logsum_settings = config.read_model_settings(model_settings['LOGSUM_SETTINGS'])

    # FIXME - MEMORY HACK - only include columns actually used in spec
    persons_merged = logsum.filter_chooser_columns(persons_merged, logsum_settings, model_settings)

    # merge persons into tours
    choosers = pd.merge(destination_sample,
                        persons_merged,
                        left_on='person_id',
                        right_index=True,
                        how="left")

    logger.info("Running %s with %s rows", trace_label, len(choosers))

    tracing.dump_df(DUMP, persons_merged, trace_label, 'persons_merged')
    tracing.dump_df(DUMP, choosers, trace_label, 'choosers')

    logsums = logsum.compute_logsums(
        choosers,
        tour_purpose,
        logsum_settings, model_settings,
        skim_dict, skim_stack,
        chunk_size, trace_hh_id,
        trace_label)

    destination_sample['mode_choice_logsum'] = logsums

    return destination_sample
Esempio n. 12
0
def auto_ownership_simulate(households,
                            households_merged,
                            chunk_size,
                            trace_hh_id):
    """
    Auto ownership is a standard model which predicts how many cars a household
    with given characteristics owns
    """
    trace_label = 'auto_ownership_simulate'
    model_settings = config.read_model_settings('auto_ownership.yaml')

    logger.info("Running %s with %d households", trace_label, len(households_merged))

    model_spec = simulate.read_model_spec(file_name='auto_ownership.csv')

    nest_spec = config.get_logit_model_settings(model_settings)
    constants = config.get_model_constants(model_settings)

    choices = simulate.simple_simulate(
        choosers=households_merged.to_frame(),
        spec=model_spec,
        nest_spec=nest_spec,
        locals_d=constants,
        chunk_size=chunk_size,
        trace_label=trace_label,
        trace_choice_name='auto_ownership')

    households = households.to_frame()

    # no need to reindex as we used all households
    households['auto_ownership'] = choices

    pipeline.replace_table("households", households)

    tracing.print_summary('auto_ownership', households.auto_ownership, value_counts=True)

    if trace_hh_id:
        tracing.trace_df(households,
                         label='auto_ownership',
                         warn_if_empty=True)
def trip_purpose_and_destination(trips, tours_merged, chunk_size, trace_hh_id):

    trace_label = "trip_purpose_and_destination"
    model_settings = config.read_model_settings(
        'trip_purpose_and_destination.yaml')

    MAX_ITERATIONS = model_settings.get('MAX_ITERATIONS', 5)

    trips_df = trips.to_frame()
    tours_merged_df = tours_merged.to_frame()

    if trips_df.empty:
        logger.info("%s - no trips. Nothing to do." % trace_label)
        return

    # FIXME could allow MAX_ITERATIONS=0 to allow for cleanup-only run
    # in which case, we would need to drop bad trips, WITHOUT failing bad_trip leg_mates
    assert (MAX_ITERATIONS > 0)

    # if trip_destination has been run before, keep only failed trips (and leg_mates) to retry
    if 'destination' in trips_df:
        if trips_df.failed.any():
            logger.info(
                'trip_destination has already been run. Rerunning failed trips'
            )
            flag_failed_trip_leg_mates(trips_df, 'failed')
            trips_df = trips_df[trips_df.failed]
            tours_merged_df = tours_merged_df[tours_merged_df.index.isin(
                trips_df.tour_id)]
            logger.info('Rerunning %s failed trips and leg-mates' %
                        trips_df.shape[0])
        else:
            # no failed trips from prior run of trip_destination
            logger.info("%s - no failed trips from prior model run." %
                        trace_label)
            del trips_df['failed']
            pipeline.replace_table("trips", trips_df)
            return

    results = []
    i = 0
    RESULT_COLUMNS = ['purpose', 'destination', 'origin', 'failed']
    while True:

        i += 1

        for c in RESULT_COLUMNS:
            if c in trips_df:
                del trips_df[c]

        trips_df = run_trip_purpose_and_destination(
            trips_df,
            tours_merged_df,
            chunk_size,
            trace_hh_id,
            trace_label=tracing.extend_trace_label(trace_label, "i%s" % i))

        num_failed_trips = trips_df.failed.sum()

        # if there were no failed trips, we are done
        if num_failed_trips == 0:
            results.append(trips_df[RESULT_COLUMNS])
            break

        logger.warning("%s %s failed trips in iteration %s" %
                       (trace_label, num_failed_trips, i))
        file_name = "%s_i%s_failed_trips" % (trace_label, i)
        logger.info("writing failed trips to %s" % file_name)
        tracing.write_csv(trips_df[trips_df.failed],
                          file_name=file_name,
                          transpose=False)

        # if max iterations reached, add remaining trips to results and give up
        # note that we do this BEFORE failing leg_mates so resulting trip legs are complete
        if i >= MAX_ITERATIONS:
            logger.warning("%s too many iterations %s" % (trace_label, i))
            results.append(trips_df[RESULT_COLUMNS])
            break

        # otherwise, if any trips failed, then their leg-mates trips must also fail
        flag_failed_trip_leg_mates(trips_df, 'failed')

        # add the good trips to results
        results.append(trips_df[~trips_df.failed][RESULT_COLUMNS])

        # and keep the failed ones to retry
        trips_df = trips_df[trips_df.failed]
        tours_merged_df = tours_merged_df[tours_merged_df.index.isin(
            trips_df.tour_id)]

    # - assign result columns to trips
    results = pd.concat(results)

    logger.info("%s %s failed trips after %s iterations" %
                (trace_label, results.failed.sum(), i))

    trips_df = trips.to_frame()
    assign_in_place(trips_df, results)

    trips_df = cleanup_failed_trips(trips_df)

    pipeline.replace_table("trips", trips_df)

    if trace_hh_id:
        tracing.trace_df(trips_df,
                         label=trace_label,
                         slicer='trip_id',
                         index_label='trip_id',
                         warn_if_empty=True)
Esempio n. 14
0
def mandatory_tour_scheduling(tours, persons_merged, tdd_alts, chunk_size,
                              trace_hh_id):
    """
    This model predicts the departure time and duration of each activity for mandatory tours
    """
    trace_label = 'mandatory_tour_scheduling'
    model_settings = config.read_model_settings(
        'mandatory_tour_scheduling.yaml')
    logsum_settings = config.read_model_settings(
        model_settings['LOGSUM_SETTINGS'])

    tours = tours.to_frame()
    mandatory_tours = tours[tours.tour_category == 'mandatory']

    # - if no mandatory_tours
    if mandatory_tours.shape[0] == 0:
        tracing.no_results(trace_label)
        return

    persons_merged = persons_merged.to_frame()

    # - filter chooser columns for both logsums and simulate
    logsum_columns = logsum_settings.get('LOGSUM_CHOOSER_COLUMNS', [])
    model_columns = model_settings.get('SIMULATE_CHOOSER_COLUMNS', [])
    chooser_columns = logsum_columns + [
        c for c in model_columns if c not in logsum_columns
    ]
    persons_merged = expressions.filter_chooser_columns(
        persons_merged, chooser_columns)

    # - add primary_purpose column
    # mtctm1 segments mandatory_scheduling spec by tour_type
    # (i.e. there are different specs for work and school tour_types)
    # mtctm1 logsum coefficients are segmented by primary_purpose
    # (i.e. there are different locsum coefficents for work, school, univ primary_purposes
    # for simplicity managing these different segmentation schemes,
    # we conflate them by segmenting the skims to align with primary_purpose
    segment_col = 'primary_purpose'
    if segment_col not in mandatory_tours:

        is_university_tour = \
            (mandatory_tours.tour_type == 'school') & \
            reindex(persons_merged.is_university, mandatory_tours.person_id)

        mandatory_tours['primary_purpose'] = \
            mandatory_tours.tour_type.where(~is_university_tour, 'univ')

    # - spec dict segmented by primary_purpose
    specs = model_settings.get('SPEC', [])
    segment_specs = {
        segment: simulate.read_model_spec(file_name=spec)
        for segment, spec in specs.items()
    }

    logger.info("Running mandatory_tour_scheduling with %d tours", len(tours))
    tdd_choices, timetable = vts.vectorize_tour_scheduling(
        mandatory_tours,
        persons_merged,
        tdd_alts,
        spec=segment_specs,
        segment_col=segment_col,
        model_settings=model_settings,
        chunk_size=chunk_size,
        trace_label=trace_label)

    timetable.replace_table()

    assign_in_place(tours, tdd_choices)
    pipeline.replace_table("tours", tours)

    # updated df for tracing
    mandatory_tours = tours[tours.tour_category == 'mandatory']

    tracing.dump_df(DUMP, tt.tour_map(persons_merged, mandatory_tours,
                                      tdd_alts), trace_label, 'tour_map')

    if trace_hh_id:
        tracing.trace_df(mandatory_tours,
                         label="mandatory_tour_scheduling",
                         slicer='person_id',
                         index_label='tour',
                         columns=None,
                         warn_if_empty=True)
def atwork_subtour_destination(tours, persons_merged, skim_dict, skim_stack,
                               land_use, size_terms, chunk_size, trace_hh_id):

    trace_label = 'atwork_subtour_destination'
    model_settings_file_name = 'atwork_subtour_destination.yaml'
    model_settings = config.read_model_settings(model_settings_file_name)

    destination_column_name = 'destination'
    logsum_column_name = model_settings.get('DEST_CHOICE_LOGSUM_COLUMN_NAME')
    want_logsums = logsum_column_name is not None

    sample_table_name = model_settings.get('DEST_CHOICE_SAMPLE_TABLE_NAME')
    want_sample_table = config.setting(
        'want_dest_choice_sample_tables') and sample_table_name is not None

    persons_merged = persons_merged.to_frame()

    tours = tours.to_frame()
    subtours = tours[tours.tour_category == 'atwork']
    # interaction_sample_simulate insists choosers appear in same order as alts
    subtours = subtours.sort_index()

    # - if no atwork subtours
    if subtours.shape[0] == 0:
        tracing.no_results('atwork_subtour_destination')
        return

    estimator = estimation.manager.begin_estimation(
        'atwork_subtour_destination')
    if estimator:
        estimator.write_coefficients(
            simulate.read_model_coefficients(model_settings))
        # estimator.write_spec(model_settings, tag='SAMPLE_SPEC')
        estimator.write_spec(model_settings, tag='SPEC')
        estimator.set_alt_id(model_settings["ALT_DEST_COL_NAME"])
        estimator.write_table(inject.get_injectable('size_terms'),
                              'size_terms',
                              append=False)
        estimator.write_table(inject.get_table('land_use').to_frame(),
                              'landuse',
                              append=False)
        estimator.write_model_settings(model_settings,
                                       model_settings_file_name)

    destination_size_terms = tour_destination_size_terms(
        land_use, size_terms, 'atwork')

    destination_sample_df = atwork_subtour_destination_sample(
        subtours,
        persons_merged,
        model_settings,
        skim_dict,
        destination_size_terms,
        estimator=estimator,
        chunk_size=chunk_size,
        trace_label=tracing.extend_trace_label(trace_label, 'sample'))

    destination_sample_df = atwork_subtour_destination_logsums(
        persons_merged,
        destination_sample_df,
        model_settings,
        skim_dict,
        skim_stack,
        chunk_size=chunk_size,
        trace_label=tracing.extend_trace_label(trace_label, 'logsums'))

    choices_df = atwork_subtour_destination_simulate(
        subtours,
        persons_merged,
        destination_sample_df,
        want_logsums,
        model_settings,
        skim_dict,
        destination_size_terms,
        estimator=estimator,
        chunk_size=chunk_size,
        trace_label=tracing.extend_trace_label(trace_label, 'simulate'))

    if estimator:
        estimator.write_choices(choices_df['choice'])
        choices_df['choice'] = estimator.get_survey_values(
            choices_df['choice'], 'tours', 'destination')
        estimator.write_override_choices(choices_df['choice'])
        estimator.end_estimation()

    subtours[destination_column_name] = choices_df['choice']
    assign_in_place(tours, subtours[[destination_column_name]])

    if want_logsums:
        subtours[logsum_column_name] = choices_df['logsum']
        assign_in_place(tours, subtours[[logsum_column_name]])

    pipeline.replace_table("tours", tours)

    if want_sample_table:
        # FIXME - sample_table
        assert len(destination_sample_df.index.unique()) == len(choices_df)
        destination_sample_df.set_index(model_settings['ALT_DEST_COL_NAME'],
                                        append=True,
                                        inplace=True)
        pipeline.extend_table(sample_table_name, destination_sample_df)

    tracing.print_summary(destination_column_name,
                          subtours[destination_column_name],
                          describe=True)

    if trace_hh_id:
        tracing.trace_df(tours,
                         label='atwork_subtour_destination',
                         columns=['destination'])
Esempio n. 16
0
    def __init__(self,
                 model_settings,
                 num_processes,
                 shared_data=None,
                 shared_data_lock=None):
        """

        Presence of shared_data is used as a flag for multiprocessing
        If we are multiprocessing, shared_data should be a multiprocessing.RawArray buffer
        to aggregate modeled_size across all sub-processes, and shared_data_lock should be
        a multiprocessing.Lock object to coordinate access to that buffer.

        Optionally load saved shadow_prices from data_dir if config setting use_shadow_pricing
        and shadow_setting LOAD_SAVED_SHADOW_PRICES are both True

        Parameters
        ----------
        model_settings : dict
        shared_data : multiprocessing.Array or None (if single process)
        shared_data_lock : numpy array wrapping multiprocessing.RawArray or None (if single process)
        """

        self.num_processes = num_processes
        self.use_shadow_pricing = bool(config.setting('use_shadow_pricing'))
        self.saved_shadow_price_file_path = None  # set by read_saved_shadow_prices if loaded

        self.model_selector = model_settings['MODEL_SELECTOR']

        full_model_run = config.setting('households_sample_size') == 0
        if self.use_shadow_pricing and not full_model_run:
            logger.warning(
                "deprecated combination of use_shadow_pricing and not full_model_run"
            )

        if (self.num_processes > 1) and not config.setting('fail_fast'):
            # if we are multiprocessing, then fail_fast should be true or we will wait forever for failed processes
            logger.warning(
                "deprecated combination of multiprocessing and not fail_fast")
            raise RuntimeError(
                "Shadow pricing requires fail_fast setting in multiprocessing mode"
            )

        self.segment_ids = model_settings['SEGMENT_IDS']

        # - modeled_size (set by call to set_choices/synchronize_choices)
        self.modeled_size = None

        if self.use_shadow_pricing:
            self.shadow_settings = config.read_model_settings(
                'shadow_pricing.yaml')

            for k in self.shadow_settings:
                logger.debug("shadow_settings %s: %s" %
                             (k, self.shadow_settings.get(k)))

        # - destination_size_table (desired_size)
        self.desired_size = inject.get_table(
            size_table_name(self.model_selector)).to_frame()
        self.desired_size = self.desired_size.sort_index()

        assert self.desired_size.index.is_monotonic_increasing, \
            f"{size_table_name(self.model_selector)} not is_monotonic_increasing"

        # - shared_data
        if shared_data is not None:
            assert shared_data.shape[0] == self.desired_size.shape[0]
            assert shared_data.shape[
                1] == self.desired_size.shape[1] + 1  # tally column
            assert shared_data_lock is not None
        self.shared_data = shared_data
        self.shared_data_lock = shared_data_lock

        # - load saved shadow_prices (if available) and set max_iterations accordingly
        if self.use_shadow_pricing:
            self.shadow_prices = None
            self.shadow_price_method = self.shadow_settings[
                'SHADOW_PRICE_METHOD']
            assert self.shadow_price_method in ['daysim', 'ctramp']

            if self.shadow_settings['LOAD_SAVED_SHADOW_PRICES']:
                # read_saved_shadow_prices logs error and returns None if file not found
                self.shadow_prices = self.read_saved_shadow_prices(
                    model_settings)

            if self.shadow_prices is None:
                self.max_iterations = self.shadow_settings.get(
                    'MAX_ITERATIONS', 5)
            else:
                self.max_iterations = self.shadow_settings.get(
                    'MAX_ITERATIONS_SAVED', 1)

            # initial_shadow_price if we did not load
            if self.shadow_prices is None:
                # initial value depends on method
                initial_shadow_price = 1.0 if self.shadow_price_method == 'ctramp' else 0.0
                self.shadow_prices = \
                    pd.DataFrame(data=initial_shadow_price,
                                 columns=self.desired_size.columns,
                                 index=self.desired_size.index)
        else:
            self.max_iterations = 1

        self.num_fail = pd.DataFrame(index=self.desired_size.columns)
        self.max_abs_diff = pd.DataFrame(index=self.desired_size.columns)
        self.max_rel_diff = pd.DataFrame(index=self.desired_size.columns)
Esempio n. 17
0
def run_trip_destination(
        trips,
        tours_merged,
        chunk_size, trace_hh_id,
        trace_label):
    """
    trip destination - main functionality separated from model step so it can be called iteratively

    Run the trip_destination model, assigning destinations for each (intermediate) trip
    (last trips already have a destination - either the tour primary destination or Home)

    Set trip destination and origin columns, and a boolean failed flag for any failed trips
    (destination for flagged failed trips will be set to -1)

    Parameters
    ----------
    trips
    tours_merged
    chunk_size
    trace_hh_id
    trace_label

    Returns
    -------

    """

    model_settings = config.read_model_settings('trip_destination.yaml')
    preprocessor_settings = model_settings.get('preprocessor', None)
    logsum_settings = config.read_model_settings(model_settings['LOGSUM_SETTINGS'])

    land_use = inject.get_table('land_use')
    size_terms = inject.get_injectable('size_terms')

    # - initialize trip origin and destination to those of half-tour
    # (we will sequentially adjust intermediate trips origin and destination as we choose them)
    tour_destination = reindex(tours_merged.destination, trips.tour_id).astype(int)
    tour_origin = reindex(tours_merged.origin, trips.tour_id).astype(int)
    trips['destination'] = np.where(trips.outbound, tour_destination, tour_origin)
    trips['origin'] = np.where(trips.outbound, tour_origin, tour_destination)
    trips['failed'] = False

    trips = trips.sort_index()
    trips['next_trip_id'] = np.roll(trips.index, -1)
    trips.next_trip_id = trips.next_trip_id.where(trips.trip_num < trips.trip_count, 0)

    # - filter tours_merged (AFTER copying destination and origin columns to trips)
    # tours_merged is used for logsums, we filter it here upfront to save space and time
    tours_merged_cols = logsum_settings['TOURS_MERGED_CHOOSER_COLUMNS']
    if 'REDUNDANT_TOURS_MERGED_CHOOSER_COLUMNS' in model_settings:
        redundant_cols = model_settings['REDUNDANT_TOURS_MERGED_CHOOSER_COLUMNS']
        tours_merged_cols = [c for c in tours_merged_cols if c not in redundant_cols]
    tours_merged = tours_merged[tours_merged_cols]

    # - skims
    skims = wrap_skims(model_settings)

    # - size_terms and alternatives
    alternatives = tour_destination_size_terms(land_use, size_terms, 'trip')

    # DataFrameMatrix alows us to treat dataframe as virtual a 2-D array, indexed by TAZ, purpose
    # e.g. size_terms.get(df.dest_taz, df.purpose)
    # returns a series of size_terms for each chooser's dest_taz and purpose with chooser index
    size_term_matrix = DataFrameMatrix(alternatives)

    # don't need size terms in alternatives, just TAZ index
    alternatives = alternatives.drop(alternatives.columns, axis=1)
    alternatives.index.name = model_settings['ALT_DEST']

    # - process intermediate trips in ascending trip_num order
    intermediate = trips.trip_num < trips.trip_count
    if intermediate.any():

        first_trip_num = trips[intermediate].trip_num.min()
        last_trip_num = trips[intermediate].trip_num.max()

        # iterate over trips in ascending trip_num order
        for trip_num in range(first_trip_num, last_trip_num + 1):

            nth_trips = trips[intermediate & (trips.trip_num == trip_num)]
            nth_trace_label = tracing.extend_trace_label(trace_label, 'trip_num_%s' % trip_num)

            # - annotate nth_trips
            if preprocessor_settings:
                expressions.assign_columns(
                    df=nth_trips,
                    model_settings=preprocessor_settings,
                    locals_dict=config.get_model_constants(model_settings),
                    trace_label=nth_trace_label)

            logger.info("Running %s with %d trips", nth_trace_label, nth_trips.shape[0])

            # - choose destination for nth_trips, segmented by primary_purpose
            choices_list = []
            for primary_purpose, trips_segment in nth_trips.groupby('primary_purpose'):
                choices = choose_trip_destination(
                    primary_purpose,
                    trips_segment,
                    alternatives,
                    tours_merged,
                    model_settings,
                    size_term_matrix, skims,
                    chunk_size, trace_hh_id,
                    trace_label=tracing.extend_trace_label(nth_trace_label, primary_purpose))

                choices_list.append(choices)

            destinations = pd.concat(choices_list)

            failed_trip_ids = nth_trips.index.difference(destinations.index)
            if failed_trip_ids.any():
                logger.warning("%s sidelining %s trips without viable destination alternatives" %
                               (nth_trace_label, failed_trip_ids.shape[0]))
                next_trip_ids = nth_trips.next_trip_id.reindex(failed_trip_ids)
                trips.loc[failed_trip_ids, 'failed'] = True
                trips.loc[failed_trip_ids, 'destination'] = -1
                trips.loc[next_trip_ids, 'origin'] = trips.loc[failed_trip_ids].origin.values

            # - assign choices to these trips destinations and to next trips origin
            assign_in_place(trips, destinations.to_frame('destination'))
            destinations.index = nth_trips.next_trip_id.reindex(destinations.index)
            assign_in_place(trips, destinations.to_frame('origin'))

    del trips['next_trip_id']

    return trips
def transit_pass_subsidy(persons_merged, persons, chunk_size, trace_hh_id):
    """
    Transit pass subsidy model.
    """

    trace_label = 'transit_pass_subsidy'
    model_settings_file_name = 'transit_pass_subsidy.yaml'

    choosers = persons_merged.to_frame()
    logger.info("Running %s with %d persons", trace_label, len(choosers))

    model_settings = config.read_model_settings(model_settings_file_name)
    estimator = estimation.manager.begin_estimation('transit_pass_subsidy')

    constants = config.get_model_constants(model_settings)

    # - preprocessor
    preprocessor_settings = model_settings.get('preprocessor', None)
    if preprocessor_settings:

        locals_d = {}
        if constants is not None:
            locals_d.update(constants)

        expressions.assign_columns(df=choosers,
                                   model_settings=preprocessor_settings,
                                   locals_dict=locals_d,
                                   trace_label=trace_label)

    model_spec = simulate.read_model_spec(file_name=model_settings['SPEC'])
    coefficients_df = simulate.read_model_coefficients(model_settings)
    model_spec = simulate.eval_coefficients(model_spec, coefficients_df,
                                            estimator)

    nest_spec = config.get_logit_model_settings(model_settings)

    if estimator:
        estimator.write_model_settings(model_settings,
                                       model_settings_file_name)
        estimator.write_spec(model_settings)
        estimator.write_coefficients(coefficients_df)
        estimator.write_choosers(choosers)

    choices = simulate.simple_simulate(
        choosers=choosers,
        spec=model_spec,
        nest_spec=nest_spec,
        locals_d=constants,
        chunk_size=chunk_size,
        trace_label=trace_label,
        trace_choice_name='transit_pass_subsidy',
        estimator=estimator)

    if estimator:
        estimator.write_choices(choices)
        choices = estimator.get_survey_values(choices, 'persons',
                                              'transit_pass_subsidy')
        estimator.write_override_choices(choices)
        estimator.end_estimation()

    persons = persons.to_frame()
    persons['transit_pass_subsidy'] = choices.reindex(persons.index)

    pipeline.replace_table("persons", persons)

    tracing.print_summary('transit_pass_subsidy',
                          persons.transit_pass_subsidy,
                          value_counts=True)

    if trace_hh_id:
        tracing.trace_df(persons, label=trace_label, warn_if_empty=True)
def atwork_subtour_destination_logsums(persons_merged,
                                       land_use,
                                       skim_dict, skim_stack,
                                       atwork_subtour_destination_sample,
                                       configs_dir,
                                       chunk_size,
                                       trace_hh_id):
    """
    add logsum column to existing workplace_location_sample able

    logsum is calculated by running the mode_choice model for each sample (person, dest_taz) pair
    in workplace_location_sample, and computing the logsum of all the utilities

    +-------+--------------+----------------+------------+----------------+
    | PERID | dest_TAZ     | rand           | pick_count | logsum (added) |
    +=======+==============+================+============+================+
    | 23750 |  14          | 0.565502716034 | 4          |  1.85659498857 |
    +-------+--------------+----------------+------------+----------------+
    + 23750 | 16           | 0.711135838871 | 6          | 1.92315598631  |
    +-------+--------------+----------------+------------+----------------+
    + ...   |              |                |            |                |
    +-------+--------------+----------------+------------+----------------+
    | 23751 | 12           | 0.408038878552 | 1          | 2.40612135416  |
    +-------+--------------+----------------+------------+----------------+
    | 23751 | 14           | 0.972732479292 | 2          |  1.44009018355 |
    +-------+--------------+----------------+------------+----------------+

    """

    trace_label = 'atwork_subtour_destination_logsums'
    model_settings = inject.get_injectable('atwork_subtour_destination_settings')

    logsums_spec = mode_choice_logsums_spec(configs_dir, 'work')

    alt_col_name = model_settings["ALT_COL_NAME"]
    chooser_col_name = 'workplace_taz'

    # FIXME - just using settings from tour_mode_choice
    logsum_settings = config.read_model_settings(configs_dir, 'tour_mode_choice.yaml')

    persons_merged = persons_merged.to_frame()
    atwork_subtour_destination_sample = atwork_subtour_destination_sample.to_frame()

    # FIXME - MEMORY HACK - only include columns actually used in spec
    chooser_columns = model_settings['LOGSUM_CHOOSER_COLUMNS']
    persons_merged = persons_merged[chooser_columns]

    # merge persons into tours
    choosers = pd.merge(atwork_subtour_destination_sample,
                        persons_merged,
                        left_on='person_id',
                        right_index=True,
                        how="left")

    choosers['in_period'] = skim_time_period_label(model_settings['IN_PERIOD'])
    choosers['out_period'] = skim_time_period_label(model_settings['OUT_PERIOD'])

    # FIXME - should do this in expression file?
    choosers['dest_topology'] = reindex(land_use.TOPOLOGY, choosers[alt_col_name])
    choosers['dest_density_index'] = reindex(land_use.density_index, choosers[alt_col_name])

    logger.info("Running atwork_subtour_destination_logsums with %s rows" % len(choosers))

    tracing.dump_df(DUMP, persons_merged, trace_label, 'persons_merged')
    tracing.dump_df(DUMP, choosers, trace_label, 'choosers')

    logsums = compute_logsums(
        choosers, logsums_spec, logsum_settings,
        skim_dict, skim_stack, chooser_col_name, alt_col_name, chunk_size, trace_hh_id, trace_label)

    # "add_column series should have an index matching the table to which it is being added"
    # when the index has duplicates, however, in the special case that the series index exactly
    # matches the table index, then the series value order is preserved. logsums does have a
    # matching index, since atwork_subtour_destination_sample was on left side of merge de-dup merge
    inject.add_column("atwork_subtour_destination_sample", "mode_choice_logsum", logsums)
Esempio n. 20
0
def add_size_tables():
    """
    inject tour_destination_size_terms tables for each model_selector (e.g. school, workplace)

    Size tables are pandas dataframes with locations counts for model_selector by zone and segment
    tour_destination_size_terms

    if using shadow pricing, we scale size_table counts to sample population
    (in which case, they have to be created while single-process)

    Scaling is problematic as it breaks household result replicability across sample sizes
    It also changes the magnitude of the size terms so if they are used as utilities in
    expression files, their importance will diminish relative to other utilities as the sample
    size decreases.

    Scaling makes most sense for a full sample in conjunction with shadow pricing, where
    shadow prices can be adjusted iteratively to bring modelled counts into line with desired
    (size table) counts.
    """

    use_shadow_pricing = bool(config.setting('use_shadow_pricing'))

    shadow_settings = config.read_model_settings('shadow_pricing.yaml')
    shadow_pricing_models = shadow_settings['shadow_pricing_models']

    # probably ought not scale if not shadow_pricing (breaks partial sample replicability)
    # but this allows compatability with existing CTRAMP behavior...
    scale_size_table = shadow_settings.get('SCALE_SIZE_TABLE', False)

    if shadow_pricing_models is None:
        logger.warning('shadow_pricing_models list not found in shadow_pricing settings')
        return

    # shadow_pricing_models is dict of {<model_selector>: <model_name>}
    # since these are scaled to model size, they have to be created while single-process

    for model_selector, model_name in iteritems(shadow_pricing_models):

        model_settings = config.read_model_settings(model_name)

        assert model_selector == model_settings['MODEL_SELECTOR']

        segment_ids = model_settings['SEGMENT_IDS']
        chooser_table_name = model_settings['CHOOSER_TABLE_NAME']
        chooser_segment_column = model_settings['CHOOSER_SEGMENT_COLUMN_NAME']

        choosers_df = inject.get_table(chooser_table_name).to_frame()
        if 'CHOOSER_FILTER_COLUMN_NAME' in model_settings:
            choosers_df = \
                choosers_df[choosers_df[model_settings['CHOOSER_FILTER_COLUMN_NAME']] != 0]

        # - raw_desired_size
        land_use = inject.get_table('land_use')
        size_terms = inject.get_injectable('size_terms')
        raw_size = tour_destination_size_terms(land_use, size_terms, model_selector)
        assert set(raw_size.columns) == set(segment_ids.keys())

        if use_shadow_pricing or scale_size_table:

            inject.add_table('raw_' + size_table_name(model_selector), raw_size)

            # - scale size_table counts to sample population
            # scaled_size = zone_size * (total_segment_modeled / total_segment_desired)

            # segment scale factor (modeled / desired) keyed by segment_name
            segment_scale_factors = {}
            for c in raw_size:
                # number of zone demographics desired destination choices
                segment_desired_size = raw_size[c].astype(np.float64).sum()

                # number of synthetic population choosers in segment
                segment_chooser_count = \
                    (choosers_df[chooser_segment_column] == segment_ids[c]).sum()

                segment_scale_factors[c] = \
                    segment_chooser_count / np.maximum(segment_desired_size, 1)

                logger.info("add_desired_size_tables %s segment %s "
                            "desired %s modeled %s scale_factor %s" %
                            (chooser_table_name, c,
                             segment_desired_size,
                             segment_chooser_count,
                             segment_scale_factors[c]))

            # FIXME - should we be rounding?
            scaled_size = (raw_size * segment_scale_factors).round()
        else:
            scaled_size = raw_size

        inject.add_table(size_table_name(model_selector), scaled_size)
Esempio n. 21
0
def write_trip_matrices(trips, network_los):
    """
    Write trip matrices step.

    Adds boolean columns to local trips table via annotation expressions,
    then aggregates trip counts and writes OD matrices to OMX.  Save annotated
    trips table to pipeline if desired.

    Writes taz trip tables for one and two zone system.  Writes taz and tap
    trip tables for three zone system.  Add ``is_tap:True`` to the settings file
    to identify an output matrix as tap level trips as opposed to taz level trips.

    For one zone system, uses the land use table for the set of possible tazs.
    For two zone system, uses the taz skim zone names for the set of possible tazs.
    For three zone system, uses the taz skim zone names for the set of possible tazs
    and uses the tap skim zone names for the set of possible taps.

    """

    model_settings = config.read_model_settings('write_trip_matrices.yaml')
    trips_df = annotate_trips(trips, network_los, model_settings)

    if bool(model_settings.get('SAVE_TRIPS_TABLE')):
        pipeline.replace_table('trips', trips_df)

    if 'parking_location' in config.setting('models'):
        parking_settings = config.read_model_settings('parking_location_choice.yaml')
        parking_taz_col_name = parking_settings['ALT_DEST_COL_NAME']
        if parking_taz_col_name in trips_df:
            trips_df.loc[trips_df[parking_taz_col_name] > 0, 'destination'] = trips_df[parking_taz_col_name]
        # Also need address the return trip

    # write matrices by zone system type
    if network_los.zone_system == los.ONE_ZONE:  # taz trips written to taz matrices
        logger.info('aggregating trips one zone...')
        aggregate_trips = trips_df.groupby(['origin', 'destination'], sort=False).sum()

        # use the average household weight for all trips in the origin destination pair
        hh_weight_col = model_settings.get('HH_EXPANSION_WEIGHT_COL')
        aggregate_weight = trips_df[['origin', 'destination', hh_weight_col]].groupby(['origin', 'destination'],
                                                                                      sort=False).mean()
        aggregate_trips[hh_weight_col] = aggregate_weight[hh_weight_col]

        orig_vals = aggregate_trips.index.get_level_values('origin')
        dest_vals = aggregate_trips.index.get_level_values('destination')

        # use the land use table for the set of possible tazs
        zone_index = pipeline.get_table('land_use').index
        assert all(zone in zone_index for zone in orig_vals)
        assert all(zone in zone_index for zone in dest_vals)

        _, orig_index = zone_index.reindex(orig_vals)
        _, dest_index = zone_index.reindex(dest_vals)

        write_matrices(aggregate_trips, zone_index, orig_index, dest_index, model_settings)

    elif network_los.zone_system == los.TWO_ZONE:  # maz trips written to taz matrices
        logger.info('aggregating trips two zone...')
        trips_df["otaz"] = pipeline.get_table('land_use').reindex(trips_df['origin']).TAZ.tolist()
        trips_df["dtaz"] = pipeline.get_table('land_use').reindex(trips_df['destination']).TAZ.tolist()
        aggregate_trips = trips_df.groupby(['otaz', 'dtaz'], sort=False).sum()

        # use the average household weight for all trips in the origin destination pair
        hh_weight_col = model_settings.get('HH_EXPANSION_WEIGHT_COL')
        aggregate_weight = trips_df[['otaz', 'dtaz', hh_weight_col]].groupby(['otaz', 'dtaz'], sort=False).mean()
        aggregate_trips[hh_weight_col] = aggregate_weight[hh_weight_col]

        orig_vals = aggregate_trips.index.get_level_values('otaz')
        dest_vals = aggregate_trips.index.get_level_values('dtaz')

        # use the taz skim zone names for the set of possible tazs
        zone_index = pd.Index(network_los.skims_info['taz'].offset_map,
                              name=network_los.skims_info['taz'].offset_map_name)
        assert all(zone in zone_index for zone in orig_vals)
        assert all(zone in zone_index for zone in dest_vals)

        _, orig_index = zone_index.reindex(orig_vals)
        _, dest_index = zone_index.reindex(dest_vals)

        write_matrices(aggregate_trips, zone_index, orig_index, dest_index, model_settings)

    elif network_los.zone_system == los.THREE_ZONE:  # maz trips written to taz and tap matrices

        logger.info('aggregating trips three zone taz...')
        trips_df["otaz"] = pipeline.get_table('land_use').reindex(trips_df['origin']).TAZ.tolist()
        trips_df["dtaz"] = pipeline.get_table('land_use').reindex(trips_df['destination']).TAZ.tolist()
        aggregate_trips = trips_df.groupby(['otaz', 'dtaz'], sort=False).sum()

        # use the average household weight for all trips in the origin destination pair
        hh_weight_col = model_settings.get('HH_EXPANSION_WEIGHT_COL')
        aggregate_weight = trips_df[['otaz', 'dtaz', hh_weight_col]].groupby(['otaz', 'dtaz'], sort=False).mean()
        aggregate_trips[hh_weight_col] = aggregate_weight[hh_weight_col]

        orig_vals = aggregate_trips.index.get_level_values('otaz')
        dest_vals = aggregate_trips.index.get_level_values('dtaz')

        # use the taz skim zone names for the set of possible tazs
        zone_index = pd.Index(network_los.skims_info['taz'].offset_map,
                              name=network_los.skims_info['taz'].offset_map_name)
        assert all(zone in zone_index for zone in orig_vals)
        assert all(zone in zone_index for zone in dest_vals)

        _, orig_index = zone_index.reindex(orig_vals)
        _, dest_index = zone_index.reindex(dest_vals)

        write_matrices(aggregate_trips, zone_index, orig_index, dest_index, model_settings)

        logger.info('aggregating trips three zone tap...')
        aggregate_trips = trips_df.groupby(['btap', 'atap'], sort=False).sum()

        # use the average household weight for all trips in the origin destination pair
        hh_weight_col = model_settings.get('HH_EXPANSION_WEIGHT_COL')
        aggregate_weight = trips_df[['btap', 'atap', hh_weight_col]].groupby(['btap', 'atap'], sort=False).mean()
        aggregate_trips[hh_weight_col] = aggregate_weight[hh_weight_col]

        orig_vals = aggregate_trips.index.get_level_values('btap')
        dest_vals = aggregate_trips.index.get_level_values('atap')

        # use the tap skim zone names for the set of possible taps
        zone_index = pd.Index(network_los.skims_info['tap'].offset_map,
                              name=network_los.skims_info['tap'].offset_map_name)
        assert all(zone in zone_index for zone in orig_vals)
        assert all(zone in zone_index for zone in dest_vals)

        _, orig_index = zone_index.reindex(orig_vals)
        _, dest_index = zone_index.reindex(dest_vals)

        write_matrices(aggregate_trips, zone_index, orig_index, dest_index, model_settings, True)
def atwork_subtour_scheduling(
        tours,
        persons_merged,
        tdd_alts,
        skim_dict,
        chunk_size,
        trace_hh_id):
    """
    This model predicts the departure time and duration of each activity for at work subtours tours
    """

    trace_label = 'atwork_subtour_scheduling'
    model_settings = config.read_model_settings('tour_scheduling_atwork.yaml')
    model_spec = simulate.read_model_spec(file_name='tour_scheduling_atwork.csv')

    persons_merged = persons_merged.to_frame()

    tours = tours.to_frame()
    subtours = tours[tours.tour_category == 'atwork']

    # - if no atwork subtours
    if subtours.shape[0] == 0:
        tracing.no_results(trace_label)
        return

    logger.info("Running %s with %d tours", trace_label, len(subtours))

    # preprocessor
    constants = config.get_model_constants(model_settings)
    od_skim_wrapper = skim_dict.wrap('origin', 'destination')
    do_skim_wrapper = skim_dict.wrap('destination', 'origin')
    skims = {
        "od_skims": od_skim_wrapper,
        "do_skims": do_skim_wrapper,
    }
    annotate_preprocessors(
        subtours, constants, skims,
        model_settings, trace_label)

    # parent_tours table with columns ['tour_id', 'tdd'] index = tour_id
    parent_tour_ids = subtours.parent_tour_id.astype(int).unique()
    parent_tours = pd.DataFrame({'tour_id': parent_tour_ids}, index=parent_tour_ids)
    parent_tours = parent_tours.merge(tours[['tdd']], left_index=True, right_index=True)

    tdd_choices = vectorize_subtour_scheduling(
        parent_tours,
        subtours,
        persons_merged,
        tdd_alts, model_spec,
        model_settings,
        chunk_size=chunk_size,
        trace_label=trace_label)

    assign_in_place(tours, tdd_choices)
    pipeline.replace_table("tours", tours)

    if trace_hh_id:
        tracing.trace_df(tours[tours.tour_category == 'atwork'],
                         label="atwork_subtour_scheduling",
                         slicer='person_id',
                         index_label='tour_id',
                         columns=None)

    if DUMP:
        subtours = tours[tours.tour_category == 'atwork']
        parent_tours = tours[tours.index.isin(subtours.parent_tour_id)]

        tracing.dump_df(DUMP, subtours, trace_label, 'sub_tours')
        tracing.dump_df(DUMP, parent_tours, trace_label, 'parent_tours')

        parent_tours['parent_tour_id'] = parent_tours.index
        subtours = pd.concat([parent_tours, subtours])
        tracing.dump_df(DUMP,
                        tt.tour_map(parent_tours, subtours, tdd_alts,
                                    persons_id_col='parent_tour_id'),
                        trace_label, 'tour_map')
Esempio n. 23
0
def trip_scheduling(trips, tours, chunk_size, trace_hh_id):
    """
    Trip scheduling assigns depart times for trips within the start, end limits of the tour.

    The algorithm is simplistic:

    The first outbound trip starts at the tour start time, and subsequent outbound trips are
    processed in trip_num order, to ensure that subsequent trips do not depart before the
    trip that preceeds them.

    Inbound trips are handled similarly, except in reverse order, starting with the last trip,
    and working backwards to ensure that inbound trips do not depart after the trip that
    succeeds them.

    The probability spec assigns probabilities for depart times, but those possible departs must
    be clipped to disallow depart times outside the tour limits, the departs of prior trips, and
    in the case of work tours, the start/end times of any atwork subtours.

    Scheduling can fail if the probability table assigns zero probabilities to all the available
    depart times in a trip's depart window. (This could be avoided by giving every window a small
    probability, rather than zero, but the existing mtctm1 prob spec does not do this. I believe
    this is due to the its having been generated from a small household travel survey sample
    that lacked any departs for some time periods.)

    Rescheduling the trips that fail (along with their inbound or outbound leg-mates) can sometimes
    fix this problem, if it was caused by an earlier trip's depart choice blocking a subsequent
    trip's ability to schedule a depart within the resulting window. But it can also happen if
    a tour is very short (e.g. one time period) and the prob spec having a zero probability for
    that tour hour.

    Therefor we need to handle trips that could not be scheduled. There are two ways (at least)
    to solve this problem:

    1) CHOOSE_MOST_INITIAL
    simply assign a depart time to the trip, even if it has a zero probability. It makes
    most sense, in this case, to assign the 'most initial' depart time, so that subsequent trips
    are minimally impacted. This can be done in the final iteration, thus affecting only the
    trips that could no be scheduled by the standard approach

    2) drop_and_cleanup
    drop trips that could no be scheduled, and adjust their leg mates, as is done for failed
    trips in trip_destination.

    For now we are choosing among these approaches with a manifest constant, but this could
    be made a model setting...

    """
    trace_label = "trip_scheduling"

    model_settings = config.read_model_settings('trip_scheduling.yaml')
    assert 'DEPART_ALT_BASE' in model_settings

    failfix = model_settings.get(FAILFIX, FAILFIX_DEFAULT)

    probs_spec = pd.read_csv(
        config.config_file_path('trip_scheduling_probs.csv'), comment='#')

    trips_df = trips.to_frame()
    tours = tours.to_frame()

    # add tour-based chunk_id so we can chunk all trips in tour together
    trips_df['chunk_id'] = \
        reindex(pd.Series(list(range(tours.shape[0])), tours.index), trips_df.tour_id)

    max_iterations = model_settings.get('MAX_ITERATIONS', 1)
    assert max_iterations > 0

    choices_list = []
    i = 0
    while (i < max_iterations) and not trips_df.empty:

        i += 1
        last_iteration = (i == max_iterations)

        trace_label_i = tracing.extend_trace_label(trace_label, "i%s" % i)
        logger.info("%s scheduling %s trips", trace_label_i, trips_df.shape[0])

        choices = \
            run_trip_scheduling(
                trips_df,
                tours,
                probs_spec,
                model_settings,
                last_iteration=last_iteration,
                trace_hh_id=trace_hh_id,
                chunk_size=chunk_size,
                trace_label=trace_label_i)

        # boolean series of trips whose individual trip scheduling failed
        failed = choices.reindex(trips_df.index).isnull()
        logger.info("%s %s failed", trace_label_i, failed.sum())

        if not last_iteration:
            # boolean series of trips whose leg scheduling failed
            failed_cohorts = failed_trip_cohorts(trips_df, failed)
            trips_df = trips_df[failed_cohorts]
            choices = choices[~failed_cohorts]

        choices_list.append(choices)

    trips_df = trips.to_frame()

    choices = pd.concat(choices_list)
    choices = choices.reindex(trips_df.index)
    if choices.isnull().any():
        logger.warning(
            "%s of %s trips could not be scheduled after %s iterations" %
            (choices.isnull().sum(), trips_df.shape[0], i))

        if failfix != FAILFIX_DROP_AND_CLEANUP:
            raise RuntimeError("%s setting '%s' not enabled in settings" %
                               (FAILFIX, FAILFIX_DROP_AND_CLEANUP))

        trips_df['failed'] = choices.isnull()
        trips_df = cleanup_failed_trips(trips_df)
        choices = choices.reindex(trips_df.index)

    trips_df['depart'] = choices

    assert not trips_df.depart.isnull().any()

    pipeline.replace_table("trips", trips_df)
Esempio n. 24
0
def trip_mode_choice(trips, tours_merged, network_los, chunk_size,
                     trace_hh_id):
    """
    Trip mode choice - compute trip_mode (same values as for tour_mode) for each trip.

    Modes for each primary tour putpose are calculated separately because they have different
    coefficient values (stored in trip_mode_choice_coefficients.csv coefficient file.)

    Adds trip_mode column to trip table
    """
    trace_label = 'trip_mode_choice'
    model_settings_file_name = 'trip_mode_choice.yaml'
    model_settings = config.read_model_settings(model_settings_file_name)

    logsum_column_name = model_settings.get('MODE_CHOICE_LOGSUM_COLUMN_NAME')
    mode_column_name = 'trip_mode'

    trips_df = trips.to_frame()
    logger.info("Running %s with %d trips", trace_label, trips_df.shape[0])

    tours_merged = tours_merged.to_frame()
    tours_merged = tours_merged[model_settings['TOURS_MERGED_CHOOSER_COLUMNS']]

    tracing.print_summary('primary_purpose',
                          trips_df.primary_purpose,
                          value_counts=True)

    # - trips_merged - merge trips and tours_merged
    trips_merged = pd.merge(trips_df,
                            tours_merged,
                            left_on='tour_id',
                            right_index=True,
                            how="left")
    assert trips_merged.index.equals(trips.index)

    # setup skim keys
    assert ('trip_period' not in trips_merged)
    trips_merged['trip_period'] = network_los.skim_time_period_label(
        trips_merged.depart)

    orig_col = 'origin'
    dest_col = 'destination'

    constants = {}
    constants.update(config.get_model_constants(model_settings))
    constants.update({'ORIGIN': orig_col, 'DESTINATION': dest_col})

    skim_dict = network_los.get_default_skim_dict()

    odt_skim_stack_wrapper = skim_dict.wrap_3d(orig_key=orig_col,
                                               dest_key=dest_col,
                                               dim3_key='trip_period')
    dot_skim_stack_wrapper = skim_dict.wrap_3d(orig_key=dest_col,
                                               dest_key=orig_col,
                                               dim3_key='trip_period')
    od_skim_wrapper = skim_dict.wrap('origin', 'destination')

    skims = {
        "odt_skims": odt_skim_stack_wrapper,
        "dot_skims": dot_skim_stack_wrapper,
        "od_skims": od_skim_wrapper,
    }

    if network_los.zone_system == los.THREE_ZONE:
        # fixme - is this a lightweight object?
        tvpb = network_los.tvpb

        tvpb_logsum_odt = tvpb.wrap_logsum(orig_key=orig_col,
                                           dest_key=dest_col,
                                           tod_key='trip_period',
                                           segment_key='demographic_segment',
                                           cache_choices=True,
                                           trace_label=trace_label,
                                           tag='tvpb_logsum_odt')
        skims.update({
            'tvpb_logsum_odt': tvpb_logsum_odt,
            # 'tvpb_logsum_dot': tvpb_logsum_dot
        })

        # TVPB constants can appear in expressions
        constants.update(
            network_los.setting('TVPB_SETTINGS.tour_mode_choice.CONSTANTS'))

    estimator = estimation.manager.begin_estimation('trip_mode_choice')
    if estimator:
        estimator.write_coefficients(model_settings=model_settings)
        estimator.write_coefficients_template(model_settings=model_settings)
        estimator.write_spec(model_settings)
        estimator.write_model_settings(model_settings,
                                       model_settings_file_name)

    model_spec = simulate.read_model_spec(file_name=model_settings['SPEC'])
    nest_spec = config.get_logit_model_settings(model_settings)

    choices_list = []
    for primary_purpose, trips_segment in trips_merged.groupby(
            'primary_purpose'):

        segment_trace_label = tracing.extend_trace_label(
            trace_label, primary_purpose)

        logger.info("trip_mode_choice tour_type '%s' (%s trips)" % (
            primary_purpose,
            len(trips_segment.index),
        ))

        # name index so tracing knows how to slice
        assert trips_segment.index.name == 'trip_id'

        if network_los.zone_system == los.THREE_ZONE:
            tvpb_logsum_odt.extend_trace_label(primary_purpose)
            # tvpb_logsum_dot.extend_trace_label(primary_purpose)

        coefficients = simulate.get_segment_coefficients(
            model_settings, primary_purpose)

        locals_dict = {}
        locals_dict.update(constants)
        locals_dict.update(coefficients)

        expressions.annotate_preprocessors(trips_segment, locals_dict, skims,
                                           model_settings, segment_trace_label)

        if estimator:
            # write choosers after annotation
            estimator.write_choosers(trips_segment)

        locals_dict.update(skims)

        choices = mode_choice_simulate(
            choosers=trips_segment,
            spec=simulate.eval_coefficients(model_spec, coefficients,
                                            estimator),
            nest_spec=simulate.eval_nest_coefficients(nest_spec, coefficients,
                                                      segment_trace_label),
            skims=skims,
            locals_d=locals_dict,
            chunk_size=chunk_size,
            mode_column_name=mode_column_name,
            logsum_column_name=logsum_column_name,
            trace_label=trace_label,
            trace_choice_name='trip_mode_choice',
            estimator=estimator)

        if trace_hh_id:
            # trace the coefficients
            tracing.trace_df(pd.Series(locals_dict),
                             label=tracing.extend_trace_label(
                                 segment_trace_label, 'constants'),
                             transpose=False,
                             slicer='NONE')

            # so we can trace with annotations
            assign_in_place(trips_segment, choices)

            tracing.trace_df(trips_segment,
                             label=tracing.extend_trace_label(
                                 segment_trace_label, 'trip_mode'),
                             slicer='tour_id',
                             index_label='tour_id',
                             warn_if_empty=True)

        choices_list.append(choices)

        # FIXME - force garbage collection
        force_garbage_collect()

    choices_df = pd.concat(choices_list)

    # add cached tvpb_logsum tap choices for modes specified in tvpb_mode_path_types
    if network_los.zone_system == los.THREE_ZONE:

        tvpb_mode_path_types = model_settings.get('tvpb_mode_path_types')
        for mode, path_type in tvpb_mode_path_types.items():

            skim_cache = tvpb_logsum_odt.cache[path_type]

            for c in skim_cache:
                dest_col = c
                if dest_col not in choices_df:
                    choices_df[
                        dest_col] = np.nan if pd.api.types.is_numeric_dtype(
                            skim_cache[c]) else ''
                choices_df[dest_col].where(
                    choices_df[mode_column_name] != mode,
                    skim_cache[c],
                    inplace=True)

    if estimator:
        estimator.write_choices(choices_df.trip_mode)
        choices_df.trip_mode = estimator.get_survey_values(
            choices_df.trip_mode, 'trips', 'trip_mode')
        estimator.write_override_choices(choices_df.trip_mode)
        estimator.end_estimation()

    # update trips table with choices (and potionally logssums)
    trips_df = trips.to_frame()
    assign_in_place(trips_df, choices_df)

    tracing.print_summary('trip_modes',
                          trips_merged.tour_mode,
                          value_counts=True)

    tracing.print_summary('trip_mode_choice choices',
                          trips_df[mode_column_name],
                          value_counts=True)

    assert not trips_df[mode_column_name].isnull().any()

    pipeline.replace_table("trips", trips_df)

    if trace_hh_id:
        tracing.trace_df(trips_df,
                         label=tracing.extend_trace_label(
                             trace_label, 'trip_mode'),
                         slicer='trip_id',
                         index_label='trip_id',
                         warn_if_empty=True)
Esempio n. 25
0
def tour_mode_choice_simulate(tours, persons_merged, network_los, chunk_size,
                              trace_hh_id):
    """
    Tour mode choice simulate
    """
    trace_label = 'tour_mode_choice'
    model_settings_file_name = 'tour_mode_choice.yaml'
    model_settings = config.read_model_settings(model_settings_file_name)

    logsum_column_name = model_settings.get('MODE_CHOICE_LOGSUM_COLUMN_NAME')
    mode_column_name = 'tour_mode'

    primary_tours = tours.to_frame()
    assert not (primary_tours.tour_category == 'atwork').any()

    logger.info("Running %s with %d tours" %
                (trace_label, primary_tours.shape[0]))

    tracing.print_summary('tour_types',
                          primary_tours.tour_type,
                          value_counts=True)

    persons_merged = persons_merged.to_frame()
    primary_tours_merged = pd.merge(primary_tours,
                                    persons_merged,
                                    left_on='person_id',
                                    right_index=True,
                                    how='left',
                                    suffixes=('', '_r'))

    constants = {}
    # model_constants can appear in expressions
    constants.update(config.get_model_constants(model_settings))

    skim_dict = network_los.get_default_skim_dict()

    # setup skim keys
    orig_col_name = 'home_zone_id'
    dest_col_name = 'destination'

    out_time_col_name = 'start'
    in_time_col_name = 'end'
    odt_skim_stack_wrapper = skim_dict.wrap_3d(orig_key=orig_col_name,
                                               dest_key=dest_col_name,
                                               dim3_key='out_period')
    dot_skim_stack_wrapper = skim_dict.wrap_3d(orig_key=dest_col_name,
                                               dest_key=orig_col_name,
                                               dim3_key='in_period')
    odr_skim_stack_wrapper = skim_dict.wrap_3d(orig_key=orig_col_name,
                                               dest_key=dest_col_name,
                                               dim3_key='in_period')
    dor_skim_stack_wrapper = skim_dict.wrap_3d(orig_key=dest_col_name,
                                               dest_key=orig_col_name,
                                               dim3_key='out_period')
    od_skim_stack_wrapper = skim_dict.wrap(orig_col_name, dest_col_name)

    skims = {
        "odt_skims": odt_skim_stack_wrapper,
        "dot_skims": dot_skim_stack_wrapper,
        "odr_skims":
        odr_skim_stack_wrapper,  # dot return skims for e.g. TNC bridge return fare
        "dor_skims":
        dor_skim_stack_wrapper,  # odt return skims for e.g. TNC bridge return fare
        "od_skims": od_skim_stack_wrapper,
        'orig_col_name': orig_col_name,
        'dest_col_name': dest_col_name,
        'out_time_col_name': out_time_col_name,
        'in_time_col_name': in_time_col_name
    }

    if network_los.zone_system == los.THREE_ZONE:
        # fixme - is this a lightweight object?

        tvpb = network_los.tvpb

        tvpb_logsum_odt = tvpb.wrap_logsum(orig_key=orig_col_name,
                                           dest_key=dest_col_name,
                                           tod_key='out_period',
                                           segment_key='demographic_segment',
                                           cache_choices=True,
                                           trace_label=trace_label,
                                           tag='tvpb_logsum_odt')
        tvpb_logsum_dot = tvpb.wrap_logsum(orig_key=dest_col_name,
                                           dest_key=orig_col_name,
                                           tod_key='in_period',
                                           segment_key='demographic_segment',
                                           cache_choices=True,
                                           trace_label=trace_label,
                                           tag='tvpb_logsum_dot')

        skims.update({
            'tvpb_logsum_odt': tvpb_logsum_odt,
            'tvpb_logsum_dot': tvpb_logsum_dot
        })

        # TVPB constants can appear in expressions
        constants.update(
            network_los.setting('TVPB_SETTINGS.tour_mode_choice.CONSTANTS'))

    estimator = estimation.manager.begin_estimation('tour_mode_choice')
    if estimator:
        estimator.write_coefficients(model_settings=model_settings)
        estimator.write_coefficients_template(model_settings=model_settings)
        estimator.write_spec(model_settings)
        estimator.write_model_settings(model_settings,
                                       model_settings_file_name)
        # (run_tour_mode_choice_simulate writes choosers post-annotation)

    # FIXME should normalize handling of tour_type and tour_purpose
    # mtctm1 school tour_type includes univ, which has different coefficients from elementary and HS
    # we should either add this column when tours created or add univ to tour_types
    not_university = (primary_tours_merged.tour_type !=
                      'school') | ~primary_tours_merged.is_university
    primary_tours_merged['tour_purpose'] = \
        primary_tours_merged.tour_type.where(not_university, 'univ')

    choices_list = []
    for tour_purpose, tours_segment in primary_tours_merged.groupby(
            'tour_purpose'):

        logger.info("tour_mode_choice_simulate tour_type '%s' (%s tours)" % (
            tour_purpose,
            len(tours_segment.index),
        ))

        if network_los.zone_system == los.THREE_ZONE:
            tvpb_logsum_odt.extend_trace_label(tour_purpose)
            tvpb_logsum_dot.extend_trace_label(tour_purpose)

        # name index so tracing knows how to slice
        assert tours_segment.index.name == 'tour_id'

        choices_df = run_tour_mode_choice_simulate(
            tours_segment,
            tour_purpose,
            model_settings,
            mode_column_name=mode_column_name,
            logsum_column_name=logsum_column_name,
            network_los=network_los,
            skims=skims,
            constants=constants,
            estimator=estimator,
            chunk_size=chunk_size,
            trace_label=tracing.extend_trace_label(trace_label, tour_purpose),
            trace_choice_name='tour_mode_choice')

        tracing.print_summary('tour_mode_choice_simulate %s choices_df' %
                              tour_purpose,
                              choices_df.tour_mode,
                              value_counts=True)

        choices_list.append(choices_df)

        # FIXME - force garbage collection
        force_garbage_collect()

    choices_df = pd.concat(choices_list)

    # add cached tvpb_logsum tap choices for modes specified in tvpb_mode_path_types
    if network_los.zone_system == los.THREE_ZONE:

        tvpb_mode_path_types = model_settings.get('tvpb_mode_path_types')
        for mode, path_types in tvpb_mode_path_types.items():

            for direction, skim in zip(['od', 'do'],
                                       [tvpb_logsum_odt, tvpb_logsum_dot]):

                path_type = path_types[direction]
                skim_cache = skim.cache[path_type]

                print(
                    f"mode {mode} direction {direction} path_type {path_type}")

                for c in skim_cache:

                    dest_col = f'{direction}_{c}'

                    if dest_col not in choices_df:
                        choices_df[
                            dest_col] = np.nan if pd.api.types.is_numeric_dtype(
                                skim_cache[c]) else ''
                    choices_df[dest_col].where(choices_df.tour_mode != mode,
                                               skim_cache[c],
                                               inplace=True)

    if estimator:
        estimator.write_choices(choices_df.tour_mode)
        choices_df.tour_mode = estimator.get_survey_values(
            choices_df.tour_mode, 'tours', 'tour_mode')
        estimator.write_override_choices(choices_df.tour_mode)
        estimator.end_estimation()

    tracing.print_summary('tour_mode_choice_simulate all tour type choices',
                          choices_df.tour_mode,
                          value_counts=True)

    # so we can trace with annotations
    assign_in_place(primary_tours, choices_df)

    # update tours table with mode choice (and optionally logsums)
    all_tours = tours.to_frame()
    assign_in_place(all_tours, choices_df)

    pipeline.replace_table("tours", all_tours)

    if trace_hh_id:
        tracing.trace_df(primary_tours,
                         label=tracing.extend_trace_label(
                             trace_label, mode_column_name),
                         slicer='tour_id',
                         index_label='tour_id',
                         warn_if_empty=True)
def atwork_subtour_destination_simulate(
        subtours,
        persons_merged,
        destination_sample,
        skim_dict,
        destination_size_terms,
        chunk_size, trace_hh_id):
    """
    atwork_subtour_destination model on atwork_subtour_destination_sample
    annotated with mode_choice logsum to select a destination from sample alternatives
    """

    trace_label = 'atwork_subtour_destination_simulate'

    model_settings = config.read_model_settings('atwork_subtour_destination.yaml')
    model_spec = simulate.read_model_spec(file_name='atwork_subtour_destination.csv')

    # interaction_sample_simulate insists choosers appear in same order as alts
    subtours = subtours.sort_index()

    # merge persons into tours
    choosers = pd.merge(subtours,
                        persons_merged,
                        left_on='person_id', right_index=True)
    # FIXME - MEMORY HACK - only include columns actually used in spec
    chooser_columns = model_settings['SIMULATE_CHOOSER_COLUMNS']
    choosers = choosers[chooser_columns]

    alt_dest_col_name = model_settings["ALT_DEST_COL_NAME"]
    chooser_col_name = 'workplace_taz'

    # alternatives are pre-sampled and annotated with logsums and pick_count
    # but we have to merge destination_size_terms columns into alt sample list
    alternatives = \
        pd.merge(destination_sample, destination_size_terms,
                 left_on=alt_dest_col_name, right_index=True, how="left")

    tracing.dump_df(DUMP, alternatives, trace_label, 'alternatives')

    constants = config.get_model_constants(model_settings)

    logger.info("Running atwork_subtour_destination_simulate with %d persons", len(choosers))

    # create wrapper with keys for this lookup - in this case there is a TAZ in the choosers
    # and a TAZ in the alternatives which get merged during interaction
    # the skims will be available under the name "skims" for any @ expressions
    skims = skim_dict.wrap(chooser_col_name, alt_dest_col_name)

    locals_d = {
        'skims': skims,
    }
    if constants is not None:
        locals_d.update(constants)

    tracing.dump_df(DUMP, choosers, trace_label, 'choosers')

    choices = interaction_sample_simulate(
        choosers,
        alternatives,
        spec=model_spec,
        choice_column=alt_dest_col_name,
        skims=skims,
        locals_d=locals_d,
        chunk_size=chunk_size,
        trace_label=trace_label,
        trace_choice_name='workplace_location')

    return choices
def atwork_subtour_destination_settings(configs_dir):
    return config.read_model_settings(configs_dir, 'atwork_subtour_destination.yaml')
Esempio n. 28
0
def joint_tour_composition(
        tours, households, persons,
        chunk_size,
        trace_hh_id):
    """
    This model predicts the makeup of the travel party (adults, children, or mixed).
    """
    trace_label = 'joint_tour_composition'

    model_settings = config.read_model_settings('joint_tour_composition.yaml')
    model_spec = simulate.read_model_spec(file_name='joint_tour_composition.csv')

    tours = tours.to_frame()
    joint_tours = tours[tours.tour_category == 'joint']

    # - if no joint tours
    if joint_tours.shape[0] == 0:
        add_null_results(trace_label, tours)
        return

    # - only interested in households with joint_tours
    households = households.to_frame()
    households = households[households.num_hh_joint_tours > 0]

    persons = persons.to_frame()
    persons = persons[persons.household_id.isin(households.index)]

    logger.info("Running joint_tour_composition with %d joint tours" % joint_tours.shape[0])

    # - run preprocessor
    preprocessor_settings = model_settings.get('preprocessor', None)
    if preprocessor_settings:

        locals_dict = {
            'persons': persons,
            'hh_time_window_overlap': hh_time_window_overlap
        }

        expressions.assign_columns(
            df=households,
            model_settings=preprocessor_settings,
            locals_dict=locals_dict,
            trace_label=trace_label)

    joint_tours_merged = pd.merge(joint_tours, households,
                                  left_on='household_id', right_index=True, how='left')

    # - simple_simulate

    nest_spec = config.get_logit_model_settings(model_settings)
    constants = config.get_model_constants(model_settings)

    choices = simulate.simple_simulate(
        choosers=joint_tours_merged,
        spec=model_spec,
        nest_spec=nest_spec,
        locals_d=constants,
        chunk_size=chunk_size,
        trace_label=trace_label,
        trace_choice_name='composition')

    # convert indexes to alternative names
    choices = pd.Series(model_spec.columns[choices.values], index=choices.index)

    # add composition column to tours for tracing
    joint_tours['composition'] = choices

    # reindex since we ran model on a subset of households
    tours['composition'] = choices.reindex(tours.index).fillna('').astype(str)
    pipeline.replace_table("tours", tours)

    tracing.print_summary('joint_tour_composition', joint_tours.composition,
                          value_counts=True)

    if trace_hh_id:
        tracing.trace_df(joint_tours,
                         label="joint_tour_composition.joint_tours",
                         slicer='household_id')
Esempio n. 29
0
def accessibility_settings(configs_dir):
    return config.read_model_settings(configs_dir, 'accessibility.yaml')
Esempio n. 30
0
def tour_mode_choice_simulate(tours, persons_merged,
                              skim_dict, skim_stack,
                              chunk_size,
                              trace_hh_id):
    """
    Tour mode choice simulate
    """
    trace_label = 'tour_mode_choice'
    model_settings = config.read_model_settings('tour_mode_choice.yaml')

    spec = tour_mode_choice_spec(model_settings)

    primary_tours = tours.to_frame()

    assert not (primary_tours.tour_category == 'atwork').any()

    persons_merged = persons_merged.to_frame()

    nest_spec = config.get_logit_model_settings(model_settings)
    constants = config.get_model_constants(model_settings)

    logger.info("Running %s with %d tours" % (trace_label, primary_tours.shape[0]))

    tracing.print_summary('tour_types',
                          primary_tours.tour_type, value_counts=True)

    primary_tours_merged = pd.merge(primary_tours, persons_merged, left_on='person_id',
                                    right_index=True, how='left', suffixes=('', '_r'))

    # setup skim keys
    orig_col_name = 'TAZ'
    dest_col_name = 'destination'
    out_time_col_name = 'start'
    in_time_col_name = 'end'
    odt_skim_stack_wrapper = skim_stack.wrap(left_key=orig_col_name, right_key=dest_col_name,
                                             skim_key='out_period')
    dot_skim_stack_wrapper = skim_stack.wrap(left_key=dest_col_name, right_key=orig_col_name,
                                             skim_key='in_period')
    od_skim_stack_wrapper = skim_dict.wrap(orig_col_name, dest_col_name)

    skims = {
        "odt_skims": odt_skim_stack_wrapper,
        "dot_skims": dot_skim_stack_wrapper,
        "od_skims": od_skim_stack_wrapper,
        'orig_col_name': orig_col_name,
        'dest_col_name': dest_col_name,
        'out_time_col_name': out_time_col_name,
        'in_time_col_name': in_time_col_name
    }

    choices_list = []
    for tour_type, segment in primary_tours_merged.groupby('tour_type'):

        logger.info("tour_mode_choice_simulate tour_type '%s' (%s tours)" %
                    (tour_type, len(segment.index), ))

        # name index so tracing knows how to slice
        assert segment.index.name == 'tour_id'

        choices = run_tour_mode_choice_simulate(
            segment,
            spec, tour_type, model_settings,
            skims=skims,
            constants=constants,
            nest_spec=nest_spec,
            chunk_size=chunk_size,
            trace_label=tracing.extend_trace_label(trace_label, tour_type),
            trace_choice_name='tour_mode_choice')

        tracing.print_summary('tour_mode_choice_simulate %s choices' % tour_type,
                              choices, value_counts=True)

        choices_list.append(choices)

        # FIXME - force garbage collection
        force_garbage_collect()

    choices = pd.concat(choices_list)

    tracing.print_summary('tour_mode_choice_simulate all tour type choices',
                          choices, value_counts=True)

    # so we can trace with annotations
    primary_tours['tour_mode'] = choices

    # but only keep mode choice col
    all_tours = tours.to_frame()
    # uncomment to save annotations to table
    # assign_in_place(all_tours, annotations)
    assign_in_place(all_tours, choices.to_frame('tour_mode'))

    pipeline.replace_table("tours", all_tours)

    if trace_hh_id:
        tracing.trace_df(primary_tours,
                         label=tracing.extend_trace_label(trace_label, 'tour_mode'),
                         slicer='tour_id',
                         index_label='tour_id',
                         warn_if_empty=True)
Esempio n. 31
0
def run_trip_purpose(
        trips_df,
        chunk_size,
        trace_hh_id,
        trace_label):
    """
    trip purpose - main functionality separated from model step so it can be called iteratively

    For each intermediate stop on a tour (i.e. trip other than the last trip outbound or inbound)
    Each trip is assigned a purpose based on an observed frequency distribution

    The distribution is segmented by tour purpose, tour direction, person type,
    and, optionally, trip depart time .

    Returns
    -------
    purpose: pandas.Series of purpose (str) indexed by trip_id
    """

    model_settings = config.read_model_settings('trip_purpose.yaml')
    probs_spec = trip_purpose_probs()

    result_list = []

    # - last trip of outbound tour gets primary_purpose
    last_trip = (trips_df.trip_num == trips_df.trip_count)
    purpose = trips_df.primary_purpose[last_trip & trips_df.outbound]
    result_list.append(purpose)
    logger.info("assign purpose to %s last outbound trips", purpose.shape[0])

    # - last trip of inbound tour gets home (or work for atwork subtours)
    purpose = trips_df.primary_purpose[last_trip & ~trips_df.outbound]
    purpose = pd.Series(np.where(purpose == 'atwork', 'Work', 'Home'), index=purpose.index)
    result_list.append(purpose)
    logger.info("assign purpose to %s last inbound trips", purpose.shape[0])

    # - intermediate stops (non-last trips) purpose assigned by probability table
    trips_df = trips_df[~last_trip]
    logger.info("assign purpose to %s intermediate trips", trips_df.shape[0])

    preprocessor_settings = model_settings.get('preprocessor', None)
    if preprocessor_settings:
        locals_dict = config.get_model_constants(model_settings)
        expressions.assign_columns(
            df=trips_df,
            model_settings=preprocessor_settings,
            locals_dict=locals_dict,
            trace_label=trace_label)

    rows_per_chunk, effective_chunk_size = \
        trip_purpose_rpc(chunk_size, trips_df, probs_spec, trace_label=trace_label)

    for i, num_chunks, trips_chunk in chunk.chunked_choosers(trips_df, rows_per_chunk):

        logger.info("Running chunk %s of %s size %d", i, num_chunks, len(trips_chunk))

        chunk_trace_label = tracing.extend_trace_label(trace_label, 'chunk_%s' % i) \
            if num_chunks > 1 else trace_label

        chunk.log_open(chunk_trace_label, chunk_size, effective_chunk_size)

        choices = choose_intermediate_trip_purpose(
            trips_chunk,
            probs_spec,
            trace_hh_id,
            trace_label=chunk_trace_label)

        chunk.log_close(chunk_trace_label)

        result_list.append(choices)

    if len(result_list) > 1:
        choices = pd.concat(result_list)

    return choices
Esempio n. 32
0
def atwork_subtour_frequency_settings(configs_dir):
    return config.read_model_settings(configs_dir, 'atwork_subtour_frequency.yaml')
Esempio n. 33
0
def run_trip_purpose(trips_df, estimator, chunk_size, trace_hh_id,
                     trace_label):
    """
    trip purpose - main functionality separated from model step so it can be called iteratively

    For each intermediate stop on a tour (i.e. trip other than the last trip outbound or inbound)
    Each trip is assigned a purpose based on an observed frequency distribution

    The distribution is segmented by tour purpose, tour direction, person type,
    and, optionally, trip depart time .

    Returns
    -------
    purpose: pandas.Series of purpose (str) indexed by trip_id
    """

    # uniform across trip_purpose
    chunk_tag = 'trip_purpose'

    model_settings_file_name = 'trip_purpose.yaml'
    model_settings = config.read_model_settings(model_settings_file_name)

    spec_file_name = model_settings.get('PROBS_SPEC', 'trip_purpose_probs.csv')
    probs_spec = pd.read_csv(config.config_file_path(spec_file_name),
                             comment='#')
    # FIXME for now, not really doing estimation for probabilistic model - just overwriting choices
    # besides, it isn't clear that named coefficients would be helpful if we had some form of estimation
    # coefficients_df = simulate.read_model_coefficients(model_settings)
    # probs_spec = map_coefficients(probs_spec, coefficients_df)

    if estimator:
        estimator.write_spec(model_settings, tag='PROBS_SPEC')
        estimator.write_model_settings(model_settings,
                                       model_settings_file_name)
        # estimator.write_coefficients(coefficients_df, model_settings)

    result_list = []

    # - last trip of outbound tour gets primary_purpose
    last_trip = (trips_df.trip_num == trips_df.trip_count)
    purpose = trips_df.primary_purpose[last_trip & trips_df.outbound]
    result_list.append(purpose)
    logger.info("assign purpose to %s last outbound trips", purpose.shape[0])

    # - last trip of inbound tour gets home (or work for atwork subtours)
    purpose = trips_df.primary_purpose[last_trip & ~trips_df.outbound]
    # FIXME should be lower case for consistency?
    purpose = pd.Series(np.where(purpose == 'atwork', 'Work', 'Home'),
                        index=purpose.index)
    result_list.append(purpose)
    logger.info("assign purpose to %s last inbound trips", purpose.shape[0])

    # - intermediate stops (non-last trips) purpose assigned by probability table
    trips_df = trips_df[~last_trip]
    logger.info("assign purpose to %s intermediate trips", trips_df.shape[0])

    preprocessor_settings = model_settings.get('preprocessor', None)
    if preprocessor_settings:
        locals_dict = config.get_model_constants(model_settings)
        expressions.assign_columns(df=trips_df,
                                   model_settings=preprocessor_settings,
                                   locals_dict=locals_dict,
                                   trace_label=trace_label)

    for i, trips_chunk, chunk_trace_label in \
            chunk.adaptive_chunked_choosers(trips_df, chunk_size, chunk_tag, trace_label):

        choices = choose_intermediate_trip_purpose(
            trips_chunk,
            probs_spec,
            estimator,
            trace_hh_id,
            trace_label=chunk_trace_label)

        result_list.append(choices)

        chunk.log_df(trace_label, f'result_list', result_list)

    if len(result_list) > 1:
        choices = pd.concat(result_list)

    return choices
Esempio n. 34
0
def compute_columns(df, model_settings, locals_dict={}, trace_label=None):
    """
    Evaluate expressions_spec in context of df, with optional additional pipeline tables in locals

    Parameters
    ----------
    df : pandas DataFrame
        or if None, expect name of pipeline table to be specified by DF in model_settings
    model_settings : dict or str
        dict with keys:
            DF - df_alias and (additionally, if df is None) name of pipeline table to load as df
            SPEC - name of expressions file (csv suffix optional) if different from model_settings
            TABLES - list of pipeline tables to load and make available as (read only) locals
        str:
            name of yaml file in configs_dir to load dict from
    locals_dict : dict
        dict of locals (e.g. utility functions) to add to the execution environment
    trace_label

    Returns
    -------
    results: pandas.DataFrame
        one column for each expression (except temps with ALL_CAP target names)
        same index as df
    """

    if isinstance(model_settings, str):
        model_settings_name = model_settings
        model_settings = config.read_model_settings('%s.yaml' % model_settings)
        assert model_settings, "Found no model settings for %s" % model_settings_name
    else:
        model_settings_name = 'dict'
        assert isinstance(model_settings, dict)

    assert 'DF' in model_settings, \
        "Expected to find 'DF' in %s" % model_settings_name

    df_name = model_settings.get('DF')
    helper_table_names = model_settings.get('TABLES', [])
    expressions_spec_name = model_settings.get('SPEC', model_settings_name)

    assert expressions_spec_name is not None, \
        "Expected to find 'SPEC' in %s" % model_settings_name

    trace_label = tracing.extend_trace_label(trace_label or '', expressions_spec_name)

    if not expressions_spec_name.endswith(".csv"):
        expressions_spec_name = '%s.csv' % expressions_spec_name
    expressions_spec = assign.read_assignment_spec(config.config_file_path(expressions_spec_name))

    assert expressions_spec.shape[0] > 0, \
        "Expected to find some assignment expressions in %s" % expressions_spec_name

    tables = {t: inject.get_table(t).to_frame() for t in helper_table_names}

    # if df was passed in, df might be a slice, or any other table, but DF is it's local alias
    assert df_name not in tables, "Did not expect to find df '%s' in TABLES" % df_name
    tables[df_name] = df

    # be nice and also give it to them as df?
    tables['df'] = df

    _locals_dict = local_utilities()
    _locals_dict.update(locals_dict)
    _locals_dict.update(tables)

    results, trace_results, trace_assigned_locals \
        = assign.assign_variables(expressions_spec,
                                  df,
                                  _locals_dict,
                                  trace_rows=tracing.trace_targets(df))

    if trace_results is not None:
        tracing.trace_df(trace_results,
                         label=trace_label,
                         slicer='NONE')

    if trace_assigned_locals:
        tracing.write_csv(trace_assigned_locals, file_name="%s_locals" % trace_label)

    return results
Esempio n. 35
0
def work_from_home(persons_merged, persons, chunk_size, trace_hh_id):
    """
    This model predicts whether a person (worker) works from home. The output
    from this model is TRUE (if works from home) or FALSE (works away from home).
    The workplace location choice is overridden for workers who work from home
    and set to -1.

    """

    trace_label = 'work_from_home'
    model_settings_file_name = 'work_from_home.yaml'

    choosers = persons_merged.to_frame()
    choosers = choosers[choosers.workplace_zone_id > -1]
    logger.info("Running %s with %d persons", trace_label, len(choosers))

    model_settings = config.read_model_settings(model_settings_file_name)
    estimator = estimation.manager.begin_estimation('work_from_home')

    constants = config.get_model_constants(model_settings)
    work_from_home_alt = model_settings['WORK_FROM_HOME_ALT']

    # - preprocessor
    preprocessor_settings = model_settings.get('preprocessor', None)
    if preprocessor_settings:

        locals_d = {}
        if constants is not None:
            locals_d.update(constants)

        expressions.assign_columns(df=choosers,
                                   model_settings=preprocessor_settings,
                                   locals_dict=locals_d,
                                   trace_label=trace_label)

    model_spec = simulate.read_model_spec(file_name=model_settings['SPEC'])
    coefficients_df = simulate.read_model_coefficients(model_settings)

    nest_spec = config.get_logit_model_settings(model_settings)

    if estimator:
        estimator.write_model_settings(model_settings,
                                       model_settings_file_name)
        estimator.write_spec(model_settings)
        estimator.write_coefficients(coefficients_df)
        estimator.write_choosers(choosers)

    # - iterative what-if if specified
    iterations = model_settings.get('WORK_FROM_HOME_ITERATIONS', 1)
    iterations_coefficient_constant = model_settings.get(
        'WORK_FROM_HOME_COEFFICIENT_CONSTANT', None)
    iterations_target_percent = model_settings.get(
        'WORK_FROM_HOME_TARGET_PERCENT', None)
    iterations_target_percent_tolerance = model_settings.get(
        'WORK_FROM_HOME_TARGET_PERCENT_TOLERANCE', None)

    for iteration in range(iterations):

        logger.info("Running %s with %d persons iteration %d", trace_label,
                    len(choosers), iteration)

        # re-read spec to reset substitution
        model_spec = simulate.read_model_spec(file_name=model_settings['SPEC'])
        model_spec = simulate.eval_coefficients(model_spec, coefficients_df,
                                                estimator)

        choices = simulate.simple_simulate(choosers=choosers,
                                           spec=model_spec,
                                           nest_spec=nest_spec,
                                           locals_d=constants,
                                           chunk_size=chunk_size,
                                           trace_label=trace_label,
                                           trace_choice_name='work_from_home',
                                           estimator=estimator)

        if iterations_target_percent is not None:
            current_percent = ((choices == work_from_home_alt).sum() /
                               len(choices))
            logger.info(
                "Running %s iteration %i current percent %f target percent %f",
                trace_label, iteration, current_percent,
                iterations_target_percent)

            if current_percent <= (iterations_target_percent +
                                   iterations_target_percent_tolerance
                                   ) and current_percent >= (
                                       iterations_target_percent -
                                       iterations_target_percent_tolerance):
                logger.info(
                    "Running %s iteration %i converged with coefficient %f",
                    trace_label, iteration,
                    coefficients_df.value[iterations_coefficient_constant])
                break

            else:
                new_value = np.log(
                    iterations_target_percent /
                    np.maximum(current_percent, 0.0001)
                ) + coefficients_df.value[iterations_coefficient_constant]
                coefficients_df.value[
                    iterations_coefficient_constant] = new_value
                logger.info(
                    "Running %s iteration %i new coefficient for next iteration %f",
                    trace_label, iteration, new_value)
                iteration = iteration + 1

    choices = (choices == work_from_home_alt)

    dest_choice_column_name = model_settings['DEST_CHOICE_COLUMN_NAME']

    if estimator:
        estimator.write_choices(choices)
        choices = estimator.get_survey_values(choices, 'persons',
                                              'work_from_home')
        estimator.write_override_choices(choices)
        estimator.end_estimation()

    persons = persons.to_frame()
    persons['work_from_home'] = choices.reindex(
        persons.index).fillna(0).astype(bool)
    persons[dest_choice_column_name] = np.where(
        persons.work_from_home is True, -1, persons[dest_choice_column_name])

    pipeline.replace_table("persons", persons)

    tracing.print_summary('work_from_home',
                          persons.work_from_home,
                          value_counts=True)

    if trace_hh_id:
        tracing.trace_df(persons, label=trace_label, warn_if_empty=True)
def non_mandatory_tour_frequency_settings(configs_dir):
    return config.read_model_settings(configs_dir, 'non_mandatory_tour_frequency.yaml')
Esempio n. 37
0
def non_mandatory_tour_frequency(persons, persons_merged,
                                 chunk_size,
                                 trace_hh_id):
    """
    This model predicts the frequency of making non-mandatory trips
    (alternatives for this model come from a separate csv file which is
    configured by the user) - these trips include escort, shopping, othmaint,
    othdiscr, eatout, and social trips in various combination.
    """

    trace_label = 'non_mandatory_tour_frequency'
    model_settings_file_name = 'non_mandatory_tour_frequency.yaml'

    model_settings = config.read_model_settings(model_settings_file_name)

    # FIXME kind of tacky both that we know to add this here and del it below
    # 'tot_tours' is used in model_spec expressions
    alternatives = simulate.read_model_alts('non_mandatory_tour_frequency_alternatives.csv', set_index=None)
    alternatives['tot_tours'] = alternatives.sum(axis=1)

    # filter based on results of CDAP
    choosers = persons_merged.to_frame()
    choosers = choosers[choosers.cdap_activity.isin(['M', 'N'])]

    # - preprocessor
    preprocessor_settings = model_settings.get('preprocessor', None)
    if preprocessor_settings:

        locals_dict = {
            'person_max_window': person_max_window
        }

        expressions.assign_columns(
            df=choosers,
            model_settings=preprocessor_settings,
            locals_dict=locals_dict,
            trace_label=trace_label)

    logger.info("Running non_mandatory_tour_frequency with %d persons", len(choosers))

    constants = config.get_model_constants(model_settings)

    model_spec = simulate.read_model_spec(file_name=model_settings['SPEC'])
    spec_segments = model_settings.get('SPEC_SEGMENTS', {})

    # segment by person type and pick the right spec for each person type
    choices_list = []
    for segment_settings in spec_segments:

        segment_name = segment_settings['NAME']
        ptype = segment_settings['PTYPE']

        # pick the spec column for the segment
        segment_spec = model_spec[[segment_name]]

        chooser_segment = choosers[choosers.ptype == ptype]

        logger.info("Running segment '%s' of size %d", segment_name, len(chooser_segment))

        if len(chooser_segment) == 0:
            # skip empty segments
            continue

        estimator = \
            estimation.manager.begin_estimation(model_name='non_mandatory_tour_frequency_%s' % segment_name,
                                                bundle_name='non_mandatory_tour_frequency')

        coefficients_df = simulate.read_model_coefficients(file_name=segment_settings['COEFFICIENTS'])
        segment_spec = simulate.eval_coefficients(segment_spec, coefficients_df, estimator)

        if estimator:
            estimator.write_spec(model_settings)
            estimator.write_model_settings(model_settings, model_settings_file_name)
            estimator.write_coefficients(coefficients_df)
            estimator.write_choosers(chooser_segment)
            estimator.write_alternatives(alternatives)

            # FIXME estimation_requires_chooser_id_in_df_column do it here or have interaction_simulate do it?
            # chooser index must be duplicated in column or it will be omitted from interaction_dataset
            # estimation requires that chooser_id is either in index or a column of interaction_dataset
            # so it can be reformatted (melted) and indexed by chooser_id and alt_id
            assert chooser_segment.index.name == 'person_id'
            assert 'person_id' not in chooser_segment.columns
            chooser_segment['person_id'] = chooser_segment.index

            # FIXME set_alt_id - do we need this for interaction_simulate estimation bundle tables?
            estimator.set_alt_id('alt_id')

            estimator.set_chooser_id(chooser_segment.index.name)

        choices = interaction_simulate(
            chooser_segment,
            alternatives,
            spec=segment_spec,
            locals_d=constants,
            chunk_size=chunk_size,
            trace_label='non_mandatory_tour_frequency.%s' % segment_name,
            trace_choice_name='non_mandatory_tour_frequency',
            estimator=estimator)

        if estimator:
            estimator.write_choices(choices)
            choices = estimator.get_survey_values(choices, 'persons', 'non_mandatory_tour_frequency')
            estimator.write_override_choices(choices)
            estimator.end_estimation()

        choices_list.append(choices)

        # FIXME - force garbage collection?
        force_garbage_collect()

    del alternatives['tot_tours']  # del tot_tours column we added above

    # The choice value 'non_mandatory_tour_frequency' assigned by interaction_simulate
    # is the index value of the chosen alternative in the alternatives table.
    choices = pd.concat(choices_list).sort_index()

    # add non_mandatory_tour_frequency column to persons
    persons = persons.to_frame()
    # we expect there to be an alt with no tours - which we can use to backfill non-travelers
    no_tours_alt = (alternatives.sum(axis=1) == 0).index[0]
    # need to reindex as we only handled persons with cdap_activity in ['M', 'N']
    persons['non_mandatory_tour_frequency'] = \
        choices.reindex(persons.index).fillna(no_tours_alt).astype(np.int8)

    """
    We have now generated non-mandatory tour frequencies, but they are attributes of the person table
    Now we create a "tours" table which has one row per tour that has been generated
    (and the person id it is associated with)

    But before we do that, we run an additional probablilistic step to extend/increase tour counts
    beyond the strict limits of the tour_frequency alternatives chosen above (which are currently limited
    to at most 2 escort tours and 1 each of shopping, othmaint, othdiscr, eatout, and social tours)

    The choice value 'non_mandatory_tour_frequency' assigned by interaction_simulate is simply the
    index value of the chosen alternative in the alternatives table.

    get counts of each of the tour type alternatives (so we can extend)
               escort  shopping  othmaint  othdiscr    eatout    social
    parent_id
    2588676         2         0         0         1         1         0
    2588677         0         1         0         1         0         0
    """

    # counts of each of the tour type alternatives (so we can extend)
    modeled_tour_counts = alternatives.loc[choices]
    modeled_tour_counts.index = choices.index  # assign person ids to the index

    # - extend_tour_counts - probabalistic
    extended_tour_counts = \
        extend_tour_counts(choosers, modeled_tour_counts.copy(), alternatives,
                           trace_hh_id, tracing.extend_trace_label(trace_label, 'extend_tour_counts'))

    num_modeled_tours = modeled_tour_counts.sum().sum()
    num_extended_tours = extended_tour_counts.sum().sum()
    logger.info("extend_tour_counts increased tour count by %s from %s to %s" %
                (num_extended_tours - num_modeled_tours, num_modeled_tours, num_extended_tours))

    """
    create the non_mandatory tours based on extended_tour_counts
    """
    if estimator:
        override_tour_counts = \
            estimation.manager.get_survey_values(extended_tour_counts,
                                                 table_name='persons',
                                                 column_names=['_%s' % c for c in extended_tour_counts.columns])
        override_tour_counts = \
            override_tour_counts.rename(columns={('_%s' % c): c for c in extended_tour_counts.columns})
        logger.info("estimation get_survey_values override_tour_counts %s changed cells" %
                    (override_tour_counts != extended_tour_counts).sum().sum())
        extended_tour_counts = override_tour_counts

    """
    create the non_mandatory tours based on extended_tour_counts
    """
    non_mandatory_tours = process_non_mandatory_tours(persons, extended_tour_counts)
    assert len(non_mandatory_tours) == extended_tour_counts.sum().sum()

    if estimator:
        # make sure they created tours with the expected tour_ids

        columns = ['person_id', 'household_id', 'tour_type', 'tour_category']
        survey_tours = \
            estimation.manager.get_survey_values(non_mandatory_tours,
                                                 table_name='tours',
                                                 column_names=columns)

        tours_differ = (non_mandatory_tours[columns] != survey_tours[columns]).any(axis=1)

        if tours_differ.any():
            print("tours_differ\n%s" % tours_differ)
            print("%s of %s tours differ" % (tours_differ.sum(), len(tours_differ)))
            print("differing survey_tours\n%s" % survey_tours[tours_differ])
            print("differing modeled_tours\n%s" % non_mandatory_tours[columns][tours_differ])

        assert(not tours_differ.any())

    pipeline.extend_table("tours", non_mandatory_tours)

    tracing.register_traceable_table('tours', non_mandatory_tours)
    pipeline.get_rn_generator().add_channel('tours', non_mandatory_tours)

    expressions.assign_columns(
        df=persons,
        model_settings=model_settings.get('annotate_persons'),
        trace_label=trace_label)

    pipeline.replace_table("persons", persons)

    tracing.print_summary('non_mandatory_tour_frequency',
                          persons.non_mandatory_tour_frequency, value_counts=True)

    if trace_hh_id:
        tracing.trace_df(non_mandatory_tours,
                         label="non_mandatory_tour_frequency.non_mandatory_tours",
                         warn_if_empty=True)

        tracing.trace_df(choosers,
                         label="non_mandatory_tour_frequency.choosers",
                         warn_if_empty=True)

        tracing.trace_df(persons,
                         label="non_mandatory_tour_frequency.annotated_persons",
                         warn_if_empty=True)
Esempio n. 38
0
def add_size_tables():
    """
    inject tour_destination_size_terms tables for each model_selector (e.g. school, workplace)

    Size tables are pandas dataframes with locations counts for model_selector by zone and segment
    tour_destination_size_terms

    if using shadow pricing, we scale size_table counts to sample population
    (in which case, they have to be created while single-process)

    Scaling is problematic as it breaks household result replicability across sample sizes
    It also changes the magnitude of the size terms so if they are used as utilities in
    expression files, their importance will diminish relative to other utilities as the sample
    size decreases.

    Scaling makes most sense for a full sample in conjunction with shadow pricing, where
    shadow prices can be adjusted iteratively to bring modelled counts into line with desired
    (size table) counts.
    """

    use_shadow_pricing = bool(config.setting('use_shadow_pricing'))

    shadow_settings = config.read_model_settings('shadow_pricing.yaml')
    shadow_pricing_models = shadow_settings.get('shadow_pricing_models')

    if shadow_pricing_models is None:
        logger.warning(
            'shadow_pricing_models list not found in shadow_pricing settings')
        return

    # probably ought not scale if not shadow_pricing (breaks partial sample replicability)
    # but this allows compatability with existing CTRAMP behavior...
    scale_size_table = shadow_settings.get('SCALE_SIZE_TABLE', False)

    # shadow_pricing_models is dict of {<model_selector>: <model_name>}
    # since these are scaled to model size, they have to be created while single-process

    for model_selector, model_name in shadow_pricing_models.items():

        model_settings = config.read_model_settings(model_name)

        assert model_selector == model_settings['MODEL_SELECTOR']

        assert 'SEGMENT_IDS' in model_settings, f"missing SEGMENT_IDS setting in {model_name} model_settings"
        segment_ids = model_settings['SEGMENT_IDS']
        chooser_table_name = model_settings['CHOOSER_TABLE_NAME']
        chooser_segment_column = model_settings['CHOOSER_SEGMENT_COLUMN_NAME']

        choosers_df = inject.get_table(chooser_table_name).to_frame()
        if 'CHOOSER_FILTER_COLUMN_NAME' in model_settings:
            choosers_df = \
                choosers_df[choosers_df[model_settings['CHOOSER_FILTER_COLUMN_NAME']] != 0]

        # - raw_desired_size
        land_use = inject.get_table('land_use')
        size_terms = inject.get_injectable('size_terms')
        raw_size = tour_destination_size_terms(land_use, size_terms,
                                               model_selector)
        assert set(raw_size.columns) == set(segment_ids.keys())

        if use_shadow_pricing or scale_size_table:

            # - scale size_table counts to sample population
            # scaled_size = zone_size * (total_segment_modeled / total_segment_desired)

            # segment scale factor (modeled / desired) keyed by segment_name
            segment_scale_factors = {}
            for c in raw_size:
                # number of zone demographics desired destination choices
                segment_desired_size = raw_size[c].astype(np.float64).sum()

                # number of synthetic population choosers in segment
                segment_chooser_count = \
                    (choosers_df[chooser_segment_column] == segment_ids[c]).sum()

                segment_scale_factors[c] = \
                    segment_chooser_count / np.maximum(segment_desired_size, 1)

                logger.info("add_desired_size_tables %s segment %s "
                            "desired %s modeled %s scale_factor %s" %
                            (chooser_table_name, c, segment_desired_size,
                             segment_chooser_count, segment_scale_factors[c]))

            # FIXME - should we be rounding?
            scaled_size = (raw_size * segment_scale_factors).round()
        else:
            scaled_size = raw_size

        logger.debug(
            f"add_size_table {size_table_name(model_selector)} ({scaled_size.shape}) for {model_selector}"
        )

        assert scaled_size.index.is_monotonic_increasing, \
            f"size table {size_table_name(model_selector)} not is_monotonic_increasing"

        inject.add_table(size_table_name(model_selector), scaled_size)
Esempio n. 39
0
def tour_od_choice(
        tours,
        persons,
        households,
        land_use,
        network_los,
        chunk_size,
        trace_hh_id):

    """Simulates joint origin/destination choice for all tours.

    Given a set of previously generated tours, each tour needs to have an
    origin and a destination. In this case tours are the choosers, but
    the associated person that's making the tour does not necessarily have
    a home location assigned already. So we choose a tour origin at the same
    time as we choose a tour destination, and assign the tour origin as that
    person's home location.

    Parameters
    ----------
    tours : orca.DataFrameWrapper
        lazy-loaded tours table
    persons : orca.DataFrameWrapper
        lazy-loaded persons table
    households : orca.DataFrameWrapper
        lazy-loaded households table
    land_use : orca.DataFrameWrapper
        lazy-loaded land use data table
    stop_frequency_alts : orca.DataFrameWrapper
        lazy-loaded table of stop frequency alternatives, e.g. "1out2in"
    network_los : orca._InjectableFuncWrapper
        lazy-loaded activitysim.los.Network_LOS object
    chunk_size
        simulation chunk size, set in main settings.yaml
    trace_hh_id : int
        households to trace, set in main settings.yaml
    """

    trace_label = 'tour_od_choice'
    model_settings_file_name = 'tour_od_choice.yaml'
    model_settings = config.read_model_settings(model_settings_file_name)
    origin_col_name = model_settings['ORIG_COL_NAME']
    dest_col_name = model_settings['DEST_COL_NAME']
    alt_id_col = tour_od.get_od_id_col(origin_col_name, dest_col_name)

    sample_table_name = model_settings.get('OD_CHOICE_SAMPLE_TABLE_NAME')
    want_sample_table = config.setting('want_dest_choice_sample_tables') and sample_table_name is not None

    logsum_column_name = model_settings.get('OD_CHOICE_LOGSUM_COLUMN_NAME', None)
    want_logsums = logsum_column_name is not None

    tours = tours.to_frame()

    # interaction_sample_simulate insists choosers appear in same order as alts
    tours = tours.sort_index()

    estimator = estimation.manager.begin_estimation('tour_od_choice')
    if estimator:
        estimator.write_coefficients(model_settings=model_settings)
        estimator.write_spec(model_settings, tag='SAMPLE_SPEC')
        estimator.write_spec(model_settings, tag='SPEC')
        estimator.set_alt_id(alt_id_col)
        estimator.write_table(inject.get_injectable('size_terms'), 'size_terms', append=False)
        estimator.write_table(inject.get_table('land_use').to_frame(), 'landuse', append=False)
        estimator.write_model_settings(model_settings, model_settings_file_name)

    choices_df, save_sample_df = tour_od.run_tour_od(
        tours,
        persons,
        want_logsums,
        want_sample_table,
        model_settings,
        network_los,
        estimator,
        chunk_size, trace_hh_id, trace_label)

    if estimator:
        assert estimator.want_unsampled_alternatives
        estimator.write_choices(choices_df.choice)
        survey_od = estimator.get_survey_values(
            choices_df.choice, 'tours', [origin_col_name, dest_col_name])
        choices_df[origin_col_name] = survey_od[origin_col_name]
        choices_df[dest_col_name] = survey_od[dest_col_name]
        survey_od[alt_id_col] = tour_od.create_od_id_col(survey_od, origin_col_name, dest_col_name)
        choices_df.choice = survey_od[alt_id_col]
        estimator.write_override_choices(choices_df.choice)
        estimator.end_estimation()

    tours[origin_col_name] = choices_df[origin_col_name].reindex(tours.index)
    tours[dest_col_name] = choices_df[dest_col_name].reindex(tours.index)
    if want_logsums:
        tours[logsum_column_name] = \
            choices_df['logsum'].reindex(tours.index).astype('float')
    tours['poe_id'] = tours[origin_col_name].map(land_use.to_frame(columns='poe_id').poe_id)

    households = households.to_frame()
    persons = persons.to_frame()
    households[origin_col_name] = tours.set_index('household_id')[origin_col_name].reindex(households.index)
    persons[origin_col_name] = households[origin_col_name].reindex(persons.household_id).values

    # Downstream steps require that persons and households have a 'home_zone_id'
    # column. We assume that if the tour_od_choice model is used, this field is
    # missing from the population data, so it gets inherited from the tour origin
    households['home_zone_id'] = households[origin_col_name]
    persons['home_zone_id'] = persons[origin_col_name]

    pipeline.replace_table("tours", tours)
    pipeline.replace_table("persons", persons)
    pipeline.replace_table("households", households)

    if want_sample_table:
        assert len(save_sample_df.index.get_level_values(0).unique()) == len(choices_df)
        pipeline.extend_table(sample_table_name, save_sample_df)

    if trace_hh_id:
        tracing.trace_df(tours,
                         label="tours_od_choice",
                         slicer='person_id',
                         index_label='tour',
                         columns=None,
                         warn_if_empty=True)
Esempio n. 40
0
def tour_mode_choice_simulate(tours, persons_merged, skim_dict, skim_stack,
                              chunk_size, trace_hh_id):
    """
    Tour mode choice simulate
    """
    trace_label = 'tour_mode_choice'
    model_settings_file_name = 'tour_mode_choice.yaml'
    model_settings = config.read_model_settings(model_settings_file_name)

    logsum_column_name = model_settings.get('MODE_CHOICE_LOGSUM_COLUMN_NAME')
    mode_column_name = 'tour_mode'  # FIXME - should be passed in?

    primary_tours = tours.to_frame()
    assert not (primary_tours.tour_category == 'atwork').any()

    persons_merged = persons_merged.to_frame()

    constants = config.get_model_constants(model_settings)

    logger.info("Running %s with %d tours" %
                (trace_label, primary_tours.shape[0]))

    tracing.print_summary('tour_types',
                          primary_tours.tour_type,
                          value_counts=True)

    primary_tours_merged = pd.merge(primary_tours,
                                    persons_merged,
                                    left_on='person_id',
                                    right_index=True,
                                    how='left',
                                    suffixes=('', '_r'))

    # setup skim keys
    orig_col_name = 'TAZ'
    dest_col_name = 'destination'
    out_time_col_name = 'start'
    in_time_col_name = 'end'
    odt_skim_stack_wrapper = skim_stack.wrap(left_key=orig_col_name,
                                             right_key=dest_col_name,
                                             skim_key='out_period')
    dot_skim_stack_wrapper = skim_stack.wrap(left_key=dest_col_name,
                                             right_key=orig_col_name,
                                             skim_key='in_period')
    odr_skim_stack_wrapper = skim_stack.wrap(left_key=orig_col_name,
                                             right_key=dest_col_name,
                                             skim_key='in_period')
    dor_skim_stack_wrapper = skim_stack.wrap(left_key=dest_col_name,
                                             right_key=orig_col_name,
                                             skim_key='out_period')
    od_skim_stack_wrapper = skim_dict.wrap(orig_col_name, dest_col_name)

    skims = {
        "odt_skims": odt_skim_stack_wrapper,
        "dot_skims": dot_skim_stack_wrapper,
        "odr_skims": odr_skim_stack_wrapper,
        "dor_skims": dor_skim_stack_wrapper,
        "od_skims": od_skim_stack_wrapper,
        'orig_col_name': orig_col_name,
        'dest_col_name': dest_col_name,
        'out_time_col_name': out_time_col_name,
        'in_time_col_name': in_time_col_name
    }

    estimator = estimation.manager.begin_estimation('tour_mode_choice')
    if estimator:
        estimator.write_coefficients(
            simulate.read_model_coefficients(model_settings))
        estimator.write_coefficients_template(
            simulate.read_model_coefficient_template(model_settings))
        estimator.write_spec(model_settings)
        estimator.write_model_settings(model_settings,
                                       model_settings_file_name)
        # FIXME run_tour_mode_choice_simulate writes choosers post-annotation

    choices_list = []
    primary_tours_merged['primary_purpose'] = \
        primary_tours_merged.tour_type.where((primary_tours_merged.tour_type != 'school') |
                                             ~primary_tours_merged.is_university, 'univ')

    for primary_purpose, tours_segment in primary_tours_merged.groupby(
            'primary_purpose'):

        logger.info(
            "tour_mode_choice_simulate primary_purpose '%s' (%s tours)" % (
                primary_purpose,
                len(tours_segment.index),
            ))

        # name index so tracing knows how to slice
        assert tours_segment.index.name == 'tour_id'

        choices_df = run_tour_mode_choice_simulate(
            tours_segment,
            primary_purpose,
            model_settings,
            mode_column_name=mode_column_name,
            logsum_column_name=logsum_column_name,
            skims=skims,
            constants=constants,
            estimator=estimator,
            chunk_size=chunk_size,
            trace_label=tracing.extend_trace_label(trace_label,
                                                   primary_purpose),
            trace_choice_name='tour_mode_choice')

        tracing.print_summary('tour_mode_choice_simulate %s choices_df' %
                              primary_purpose,
                              choices_df.tour_mode,
                              value_counts=True)

        choices_list.append(choices_df)

        # FIXME - force garbage collection
        force_garbage_collect()

    choices_df = pd.concat(choices_list)

    if estimator:
        estimator.write_choices(choices_df.tour_mode)
        choices_df.tour_mode = estimator.get_survey_values(
            choices_df.tour_mode, 'tours', 'tour_mode')
        estimator.write_override_choices(choices_df.tour_mode)
        estimator.end_estimation()

    tracing.print_summary('tour_mode_choice_simulate all tour type choices',
                          choices_df.tour_mode,
                          value_counts=True)

    # so we can trace with annotations
    assign_in_place(primary_tours, choices_df)

    # but only keep mode choice col
    all_tours = tours.to_frame()
    assign_in_place(all_tours, choices_df)

    pipeline.replace_table("tours", all_tours)

    if trace_hh_id:
        tracing.trace_df(primary_tours,
                         label=tracing.extend_trace_label(
                             trace_label, mode_column_name),
                         slicer='tour_id',
                         index_label='tour_id',
                         warn_if_empty=True)
def atwork_subtour_mode_choice(tours, persons_merged, network_los, chunk_size,
                               trace_hh_id):
    """
    At-work subtour mode choice simulate
    """

    trace_label = 'atwork_subtour_mode_choice'

    model_settings_file_name = 'tour_mode_choice.yaml'
    model_settings = config.read_model_settings(model_settings_file_name)

    logsum_column_name = model_settings.get('MODE_CHOICE_LOGSUM_COLUMN_NAME')
    mode_column_name = 'tour_mode'

    tours = tours.to_frame()
    subtours = tours[tours.tour_category == 'atwork']

    # - if no atwork subtours
    if subtours.shape[0] == 0:
        tracing.no_results(trace_label)
        return

    subtours_merged = \
        pd.merge(subtours, persons_merged.to_frame(),
                 left_on='person_id', right_index=True, how='left')

    logger.info("Running %s with %d subtours" %
                (trace_label, subtours_merged.shape[0]))

    tracing.print_summary('%s tour_type' % trace_label,
                          subtours_merged.tour_type,
                          value_counts=True)

    constants = {}
    constants.update(config.get_model_constants(model_settings))

    skim_dict = network_los.get_default_skim_dict()

    # setup skim keys
    orig_col_name = 'workplace_zone_id'
    dest_col_name = 'destination'
    out_time_col_name = 'start'
    in_time_col_name = 'end'
    odt_skim_stack_wrapper = skim_dict.wrap_3d(orig_key=orig_col_name,
                                               dest_key=dest_col_name,
                                               dim3_key='out_period')
    dot_skim_stack_wrapper = skim_dict.wrap_3d(orig_key=dest_col_name,
                                               dest_key=orig_col_name,
                                               dim3_key='in_period')
    odr_skim_stack_wrapper = skim_dict.wrap_3d(orig_key=orig_col_name,
                                               dest_key=dest_col_name,
                                               dim3_key='in_period')
    dor_skim_stack_wrapper = skim_dict.wrap_3d(orig_key=dest_col_name,
                                               dest_key=orig_col_name,
                                               dim3_key='out_period')
    od_skim_stack_wrapper = skim_dict.wrap(orig_col_name, dest_col_name)

    skims = {
        "odt_skims": odt_skim_stack_wrapper,
        "dot_skims": dot_skim_stack_wrapper,
        "odr_skims": odr_skim_stack_wrapper,
        "dor_skims": dor_skim_stack_wrapper,
        "od_skims": od_skim_stack_wrapper,
        'orig_col_name': orig_col_name,
        'dest_col_name': dest_col_name,
        'out_time_col_name': out_time_col_name,
        'in_time_col_name': in_time_col_name
    }

    if network_los.zone_system == los.THREE_ZONE:
        # fixme - is this a lightweight object?
        tvpb = network_los.tvpb

        tvpb_logsum_odt = tvpb.wrap_logsum(orig_key=orig_col_name,
                                           dest_key=dest_col_name,
                                           tod_key='out_period',
                                           segment_key='demographic_segment',
                                           cache_choices=True,
                                           trace_label=trace_label,
                                           tag='tvpb_logsum_odt')
        tvpb_logsum_dot = tvpb.wrap_logsum(orig_key=dest_col_name,
                                           dest_key=orig_col_name,
                                           tod_key='in_period',
                                           segment_key='demographic_segment',
                                           cache_choices=True,
                                           trace_label=trace_label,
                                           tag='tvpb_logsum_dot')

        skims.update({
            'tvpb_logsum_odt': tvpb_logsum_odt,
            'tvpb_logsum_dot': tvpb_logsum_dot
        })

        # TVPB constants can appear in expressions
        constants.update(
            network_los.setting('TVPB_SETTINGS.tour_mode_choice.CONSTANTS'))

    estimator = estimation.manager.begin_estimation(
        'atwork_subtour_mode_choice')
    if estimator:
        estimator.write_coefficients(model_settings=model_settings)
        estimator.write_coefficients_template(model_settings=model_settings)
        estimator.write_spec(model_settings)
        estimator.write_model_settings(model_settings,
                                       model_settings_file_name)
        # FIXME run_tour_mode_choice_simulate writes choosers post-annotation

    choices_df = run_tour_mode_choice_simulate(
        subtours_merged,
        tour_purpose='atwork',
        model_settings=model_settings,
        mode_column_name=mode_column_name,
        logsum_column_name=logsum_column_name,
        network_los=network_los,
        skims=skims,
        constants=constants,
        estimator=estimator,
        chunk_size=chunk_size,
        trace_label=trace_label,
        trace_choice_name='tour_mode_choice')

    # add cached tvpb_logsum tap choices for modes specified in tvpb_mode_path_types
    if network_los.zone_system == los.THREE_ZONE:

        tvpb_mode_path_types = model_settings.get('tvpb_mode_path_types')
        for mode, path_types in tvpb_mode_path_types.items():

            for direction, skim in zip(['od', 'do'],
                                       [tvpb_logsum_odt, tvpb_logsum_dot]):

                path_type = path_types[direction]
                skim_cache = skim.cache[path_type]

                print(
                    f"mode {mode} direction {direction} path_type {path_type}")

                for c in skim_cache:

                    dest_col = f'{direction}_{c}'

                    if dest_col not in choices_df:
                        choices_df[
                            dest_col] = np.nan if pd.api.types.is_numeric_dtype(
                                skim_cache[c]) else ''
                    choices_df[dest_col].where(choices_df.tour_mode != mode,
                                               skim_cache[c],
                                               inplace=True)

    if estimator:
        estimator.write_choices(choices_df[mode_column_name])
        choices_df[mode_column_name] = \
            estimator.get_survey_values(choices_df[mode_column_name], 'tours', mode_column_name)
        estimator.write_override_choices(choices_df[mode_column_name])
        estimator.end_estimation()

    tracing.print_summary('%s choices' % trace_label,
                          choices_df[mode_column_name],
                          value_counts=True)

    assign_in_place(tours, choices_df)
    pipeline.replace_table("tours", tours)

    if trace_hh_id:
        tracing.trace_df(tours[tours.tour_category == 'atwork'],
                         label=tracing.extend_trace_label(
                             trace_label, mode_column_name),
                         slicer='tour_id',
                         index_label='tour_id')
Esempio n. 42
0
def trip_mode_choice(
        trips,
        tours_merged,
        skim_dict, skim_stack,
        chunk_size, trace_hh_id):
    """
    Trip mode choice - compute trip_mode (same values as for tour_mode) for each trip.

    Modes for each primary tour putpose are calculated separately because they have different
    coefficient values (stored in trip_mode_choice_coeffs.csv coefficient file.)

    Adds trip_mode column to trip table
    """
    trace_label = 'trip_mode_choice'
    model_settings = config.read_model_settings('trip_mode_choice.yaml')

    model_spec = \
        simulate.read_model_spec(file_name=model_settings['SPEC'])
    omnibus_coefficients = \
        assign.read_constant_spec(config.config_file_path(model_settings['COEFFS']))

    trips_df = trips.to_frame()
    logger.info("Running %s with %d trips", trace_label, trips_df.shape[0])

    tours_merged = tours_merged.to_frame()
    tours_merged = tours_merged[model_settings['TOURS_MERGED_CHOOSER_COLUMNS']]

    nest_spec = config.get_logit_model_settings(model_settings)

    tracing.print_summary('primary_purpose',
                          trips_df.primary_purpose, value_counts=True)

    # - trips_merged - merge trips and tours_merged
    trips_merged = pd.merge(
        trips_df,
        tours_merged,
        left_on='tour_id',
        right_index=True,
        how="left")
    assert trips_merged.index.equals(trips.index)

    # setup skim keys
    assert ('trip_period' not in trips_merged)
    trips_merged['trip_period'] = skim_time_period_label(trips_merged.depart)

    orig_col = 'origin'
    dest_col = 'destination'

    odt_skim_stack_wrapper = skim_stack.wrap(left_key=orig_col, right_key=dest_col,
                                             skim_key='trip_period')
    od_skim_wrapper = skim_dict.wrap('origin', 'destination')

    skims = {
        "odt_skims": odt_skim_stack_wrapper,
        "od_skims": od_skim_wrapper,
    }

    constants = config.get_model_constants(model_settings)
    constants.update({
        'ORIGIN': orig_col,
        'DESTINATION': dest_col
    })

    choices_list = []
    for primary_purpose, trips_segment in trips_merged.groupby('primary_purpose'):

        segment_trace_label = tracing.extend_trace_label(trace_label, primary_purpose)

        logger.info("trip_mode_choice tour_type '%s' (%s trips)" %
                    (primary_purpose, len(trips_segment.index), ))

        # name index so tracing knows how to slice
        assert trips_segment.index.name == 'trip_id'

        locals_dict = assign.evaluate_constants(omnibus_coefficients[primary_purpose],
                                                constants=constants)
        locals_dict.update(constants)

        annotate_preprocessors(
            trips_segment, locals_dict, skims,
            model_settings, segment_trace_label)

        locals_dict.update(skims)
        choices = simulate.simple_simulate(
            choosers=trips_segment,
            spec=model_spec,
            nest_spec=nest_spec,
            skims=skims,
            locals_d=locals_dict,
            chunk_size=chunk_size,
            trace_label=segment_trace_label,
            trace_choice_name='trip_mode_choice')

        alts = model_spec.columns
        choices = choices.map(dict(list(zip(list(range(len(alts))), alts))))

        # tracing.print_summary('trip_mode_choice %s choices' % primary_purpose,
        #                       choices, value_counts=True)

        if trace_hh_id:
            # trace the coefficients
            tracing.trace_df(pd.Series(locals_dict),
                             label=tracing.extend_trace_label(segment_trace_label, 'constants'),
                             transpose=False,
                             slicer='NONE')

            # so we can trace with annotations
            trips_segment['trip_mode'] = choices
            tracing.trace_df(trips_segment,
                             label=tracing.extend_trace_label(segment_trace_label, 'trip_mode'),
                             slicer='tour_id',
                             index_label='tour_id',
                             warn_if_empty=True)

        choices_list.append(choices)

        # FIXME - force garbage collection
        force_garbage_collect()

    choices = pd.concat(choices_list)

    trips_df = trips.to_frame()
    trips_df['trip_mode'] = choices

    tracing.print_summary('tour_modes',
                          trips_merged.tour_mode, value_counts=True)

    tracing.print_summary('trip_mode_choice choices',
                          choices, value_counts=True)

    assert not trips_df.trip_mode.isnull().any()

    pipeline.replace_table("trips", trips_df)

    if trace_hh_id:
        tracing.trace_df(trips_df,
                         label=tracing.extend_trace_label(trace_label, 'trip_mode'),
                         slicer='trip_id',
                         index_label='trip_id',
                         warn_if_empty=True)
Esempio n. 43
0
def best_transit_path(set_random_seed,
                      network_los,
                      best_transit_path_spec):

    model_settings = config.read_model_settings('best_transit_path.yaml')

    logger.info("best_transit_path VECTOR_TEST_SIZE %s", VECTOR_TEST_SIZE)

    omaz = network_los.maz_df.sample(VECTOR_TEST_SIZE, replace=True).index
    dmaz = network_los.maz_df.sample(VECTOR_TEST_SIZE, replace=True).index
    tod = np.random.choice(['AM', 'PM'], VECTOR_TEST_SIZE)
    od_df = pd.DataFrame({'omaz': omaz, 'dmaz': dmaz, 'tod': tod})

    trace_od = (od_df.omaz[0], od_df.dmaz[0])
    logger.info("trace_od omaz %s dmaz %s" % trace_od)

    # build exploded atap_btap_df

    # FIXME - pathological knowledge about mode - should be parameterized
    # filter out rows with no drive time omaz-btap or no walk time from dmaz-atap
    atap_btap_df = network_los.get_tappairs_mazpairs(od_df.omaz, od_df.dmaz,
                                                     ofilter='drive_time',
                                                     dfilter='walk_alightingActual')

    # add in tod column
    atap_btap_df = atap_btap_df.merge(
        right=od_df[['tod']],
        left_on='idx',
        right_index=True,
        how='left'
    )

    logger.info("len od_df %s", len(od_df.index))
    logger.info("len atap_btap_df %s", len(atap_btap_df.index))
    logger.info("avg explosion %s", (len(atap_btap_df.index) / (1.0 * len(od_df.index))))

    if trace_od:
        trace_orig, trace_dest = trace_od
        trace_oabd_rows = (atap_btap_df.omaz == trace_orig) & (atap_btap_df.dmaz == trace_dest)
    else:
        trace_oabd_rows = None

    constants = config.get_model_constants(model_settings)
    locals_d = {
        'np': np,
        'network_los': network_los
    }
    if constants is not None:
        locals_d.update(constants)

    results, trace_results, trace_assigned_locals \
        = assign.assign_variables(best_transit_path_spec, atap_btap_df, locals_d,
                                  trace_rows=trace_oabd_rows)

    # copy results
    for column in results.columns:
        atap_btap_df[column] = results[column]

    # drop rows if no utility
    n = len(atap_btap_df.index)
    atap_btap_df = atap_btap_df.dropna(subset=['utility'])

    logger.info("Dropped %s of %s rows with null utility", n - len(atap_btap_df.index), n)

    # choose max utility
    atap_btap_df = atap_btap_df.sort_values(by='utility').groupby('idx').tail(1)

    if trace_od:

        if not trace_oabd_rows.any():
            logger.warning("trace_od not found origin = %s, dest = %s", trace_orig, trace_dest)
        else:

            tracing.trace_df(atap_btap_df,
                             label='best_transit_path',
                             slicer='NONE',
                             transpose=False)

            tracing.trace_df(trace_results,
                             label='trace_best_transit_path',
                             slicer='NONE',
                             transpose=False)

            if trace_assigned_locals:
                tracing.write_csv(trace_assigned_locals, file_name="trace_best_transit_path_locals")
Esempio n. 44
0
def joint_tour_scheduling(
        tours,
        persons_merged,
        tdd_alts,
        chunk_size,
        trace_hh_id):
    """
    This model predicts the departure time and duration of each joint tour
    """
    trace_label = 'joint_tour_scheduling'
    model_settings = config.read_model_settings('joint_tour_scheduling.yaml')
    model_spec = simulate.read_model_spec(file_name='tour_scheduling_joint.csv')

    tours = tours.to_frame()
    joint_tours = tours[tours.tour_category == 'joint']

    # - if no joint tours
    if joint_tours.shape[0] == 0:
        tracing.no_results(trace_label)
        return

    # use inject.get_table as this won't exist if there are no joint_tours
    joint_tour_participants = inject.get_table('joint_tour_participants').to_frame()

    persons_merged = persons_merged.to_frame()

    logger.info("Running %s with %d joint tours", trace_label, joint_tours.shape[0])

    # it may seem peculiar that we are concerned with persons rather than households
    # but every joint tour is (somewhat arbitrarily) assigned a "primary person"
    # some of whose characteristics are used in the spec
    # and we get household attributes along with person attributes in persons_merged
    persons_merged = persons_merged[persons_merged.num_hh_joint_tours > 0]

    # since a households joint tours each potentially different participants
    # they may also have different joint tour masks (free time of all participants)
    # so we have to either chunk processing by joint_tour_num and build timetable by household
    # or build timetables by unique joint_tour

    constants = config.get_model_constants(model_settings)

    # - run preprocessor to annotate choosers
    preprocessor_settings = model_settings.get('preprocessor', None)
    if preprocessor_settings:

        locals_d = {}
        if constants is not None:
            locals_d.update(constants)

        expressions.assign_columns(
            df=joint_tours,
            model_settings=preprocessor_settings,
            locals_dict=locals_d,
            trace_label=trace_label)

    tdd_choices, timetable = vectorize_joint_tour_scheduling(
        joint_tours, joint_tour_participants,
        persons_merged,
        tdd_alts,
        spec=model_spec,
        model_settings=model_settings,
        chunk_size=chunk_size,
        trace_label=trace_label)

    timetable.replace_table()

    assign_in_place(tours, tdd_choices)
    pipeline.replace_table("tours", tours)

    # updated df for tracing
    joint_tours = tours[tours.tour_category == 'joint']

    if trace_hh_id:
        tracing.trace_df(joint_tours,
                         label="joint_tour_scheduling",
                         slicer='household_id')
Esempio n. 45
0
def compute_accessibility(accessibility, skim_dict, land_use, trace_od):

    """
    Compute accessibility for each zone in land use file using expressions from accessibility_spec

    The actual results depend on the expressions in accessibility_spec, but this is initially
    intended to permit implementation of the mtc accessibility calculation as implemented by
    Accessibility.job

    Compute measures of accessibility used by the automobile ownership model.
    The accessibility measure first multiplies an employment variable by a mode-specific decay
    function.  The product reflects the difficulty of accessing the activities the farther
    (in terms of round-trip travel time) the jobs are from the location in question. The products
    to each destination zone are next summed over each origin zone, and the logarithm of the
    product mutes large differences.  The decay function on the walk accessibility measure is
    steeper than automobile or transit.  The minimum accessibility is zero.
    """

    trace_label = 'compute_accessibility'
    model_settings = config.read_model_settings('accessibility.yaml')
    assignment_spec = assign.read_assignment_spec(config.config_file_path('accessibility.csv'))

    accessibility_df = accessibility.to_frame()

    logger.info("Running %s with %d dest zones" % (trace_label, len(accessibility_df)))

    constants = config.get_model_constants(model_settings)
    land_use_columns = model_settings.get('land_use_columns', [])

    land_use_df = land_use.to_frame()

    # #bug
    #
    # land_use_df = land_use_df[land_use_df.index % 2 == 1]
    # accessibility_df = accessibility_df[accessibility_df.index.isin(land_use_df.index)].head(5)
    #
    # print "land_use_df", land_use_df.index
    # print "accessibility_df", accessibility_df.index
    # #bug

    orig_zones = accessibility_df.index.values
    dest_zones = land_use_df.index.values

    orig_zone_count = len(orig_zones)
    dest_zone_count = len(dest_zones)

    logger.info("Running %s with %d dest zones %d orig zones" %
                (trace_label, dest_zone_count, orig_zone_count))

    # create OD dataframe
    od_df = pd.DataFrame(
        data={
            'orig': np.repeat(np.asanyarray(accessibility_df.index), dest_zone_count),
            'dest': np.tile(np.asanyarray(land_use_df.index), orig_zone_count)
        }
    )

    if trace_od:
        trace_orig, trace_dest = trace_od
        trace_od_rows = (od_df.orig == trace_orig) & (od_df.dest == trace_dest)
    else:
        trace_od_rows = None

    # merge land_use_columns into od_df
    land_use_df = land_use_df[land_use_columns]
    od_df = pd.merge(od_df, land_use_df, left_on='dest', right_index=True).sort_index()

    locals_d = {
        'log': np.log,
        'exp': np.exp,
        'skim_od': AccessibilitySkims(skim_dict, orig_zones, dest_zones),
        'skim_do': AccessibilitySkims(skim_dict, orig_zones, dest_zones, transpose=True)
    }
    if constants is not None:
        locals_d.update(constants)

    results, trace_results, trace_assigned_locals \
        = assign.assign_variables(assignment_spec, od_df, locals_d, trace_rows=trace_od_rows)

    for column in results.columns:
        data = np.asanyarray(results[column])
        data.shape = (orig_zone_count, dest_zone_count)
        accessibility_df[column] = np.log(np.sum(data, axis=1) + 1)

    # - write table to pipeline
    pipeline.replace_table("accessibility", accessibility_df)

    if trace_od:

        if not trace_od_rows.any():
            logger.warning("trace_od not found origin = %s, dest = %s" % (trace_orig, trace_dest))
        else:

            # add OD columns to trace results
            df = pd.concat([od_df[trace_od_rows], trace_results], axis=1)

            # dump the trace results table (with _temp variables) to aid debugging
            tracing.trace_df(df,
                             label='accessibility',
                             index_label='skim_offset',
                             slicer='NONE',
                             warn_if_empty=True)

            if trace_assigned_locals:
                tracing.write_csv(trace_assigned_locals, file_name="accessibility_locals")
def atwork_subtour_scheduling(tours, persons_merged, tdd_alts, skim_dict,
                              chunk_size, trace_hh_id):
    """
    This model predicts the departure time and duration of each activity for at work subtours tours
    """

    trace_label = 'atwork_subtour_scheduling'
    model_settings_file_name = 'tour_scheduling_atwork.yaml'

    tours = tours.to_frame()
    subtours = tours[tours.tour_category == 'atwork']

    # - if no atwork subtours
    if subtours.shape[0] == 0:
        tracing.no_results(trace_label)
        return

    model_settings = config.read_model_settings(model_settings_file_name)
    estimator = estimation.manager.begin_estimation(
        'atwork_subtour_scheduling')

    model_spec = simulate.read_model_spec(file_name=model_settings['SPEC'])
    coefficients_df = simulate.read_model_coefficients(model_settings)
    model_spec = simulate.eval_coefficients(model_spec, coefficients_df,
                                            estimator)

    persons_merged = persons_merged.to_frame()

    logger.info("Running %s with %d tours", trace_label, len(subtours))

    # preprocessor
    constants = config.get_model_constants(model_settings)
    od_skim_wrapper = skim_dict.wrap('origin', 'destination')
    skims = {
        "od_skims": od_skim_wrapper,
    }
    expressions.annotate_preprocessors(subtours, constants, skims,
                                       model_settings, trace_label)

    # parent_tours table with columns ['tour_id', 'tdd'] index = tour_id
    parent_tour_ids = subtours.parent_tour_id.astype(np.int64).unique()
    parent_tours = pd.DataFrame({'tour_id': parent_tour_ids},
                                index=parent_tour_ids)
    parent_tours = parent_tours.merge(tours[['tdd']],
                                      left_index=True,
                                      right_index=True)

    if estimator:
        estimator.write_model_settings(model_settings,
                                       model_settings_file_name)
        estimator.write_spec(model_settings)
        estimator.write_coefficients(coefficients_df)
        # we don't need to update timetable because subtours are scheduled inside work trip windows

    choices = vectorize_subtour_scheduling(parent_tours,
                                           subtours,
                                           persons_merged,
                                           tdd_alts,
                                           model_spec,
                                           model_settings,
                                           estimator=estimator,
                                           chunk_size=chunk_size,
                                           trace_label=trace_label)

    if estimator:
        estimator.write_choices(choices)
        choices = estimator.get_survey_values(choices, 'tours', 'tdd')
        estimator.write_override_choices(choices)
        estimator.end_estimation()

    # choices are tdd alternative ids
    # we want to add start, end, and duration columns to tours, which we have in tdd_alts table
    tdd_choices = pd.merge(choices.to_frame('tdd'),
                           tdd_alts,
                           left_on=['tdd'],
                           right_index=True,
                           how='left')

    assign_in_place(tours, tdd_choices)
    pipeline.replace_table("tours", tours)

    if trace_hh_id:
        tracing.trace_df(tours[tours.tour_category == 'atwork'],
                         label="atwork_subtour_scheduling",
                         slicer='person_id',
                         index_label='tour_id',
                         columns=None)

    if DUMP:
        subtours = tours[tours.tour_category == 'atwork']
        parent_tours = tours[tours.index.isin(subtours.parent_tour_id)]

        tracing.dump_df(DUMP, subtours, trace_label, 'sub_tours')
        tracing.dump_df(DUMP, parent_tours, trace_label, 'parent_tours')

        parent_tours['parent_tour_id'] = parent_tours.index
        subtours = pd.concat([parent_tours, subtours])
        tracing.dump_df(
            DUMP,
            tt.tour_map(parent_tours,
                        subtours,
                        tdd_alts,
                        persons_id_col='parent_tour_id'), trace_label,
            'tour_map')
Esempio n. 47
0
def mandatory_tour_frequency(persons_merged,
                             chunk_size,
                             trace_hh_id):
    """
    This model predicts the frequency of making mandatory trips (see the
    alternatives above) - these trips include work and school in some combination.
    """
    trace_label = 'mandatory_tour_frequency'

    model_settings = config.read_model_settings('mandatory_tour_frequency.yaml')
    model_spec = simulate.read_model_spec(file_name='mandatory_tour_frequency.csv')
    alternatives = simulate.read_model_alts(
        config.config_file_path('mandatory_tour_frequency_alternatives.csv'), set_index='alt')

    choosers = persons_merged.to_frame()
    # filter based on results of CDAP
    choosers = choosers[choosers.cdap_activity == 'M']
    logger.info("Running mandatory_tour_frequency with %d persons", len(choosers))

    # - if no mandatory tours
    if choosers.shape[0] == 0:
        add_null_results(trace_label, model_settings)
        return

    # - preprocessor
    preprocessor_settings = model_settings.get('preprocessor', None)
    if preprocessor_settings:

        locals_dict = {}

        expressions.assign_columns(
            df=choosers,
            model_settings=preprocessor_settings,
            locals_dict=locals_dict,
            trace_label=trace_label)

    nest_spec = config.get_logit_model_settings(model_settings)
    constants = config.get_model_constants(model_settings)

    choices = simulate.simple_simulate(
        choosers=choosers,
        spec=model_spec,
        nest_spec=nest_spec,
        locals_d=constants,
        chunk_size=chunk_size,
        trace_label=trace_label,
        trace_choice_name='mandatory_tour_frequency')

    # convert indexes to alternative names
    choices = pd.Series(
        model_spec.columns[choices.values],
        index=choices.index).reindex(persons_merged.local.index)

    # - create mandatory tours
    """
    This reprocesses the choice of index of the mandatory tour frequency
    alternatives into an actual dataframe of tours.  Ending format is
    the same as got non_mandatory_tours except trip types are "work" and "school"
    """
    choosers['mandatory_tour_frequency'] = choices
    mandatory_tours = process_mandatory_tours(
        persons=choosers,
        mandatory_tour_frequency_alts=alternatives
    )

    tours = pipeline.extend_table("tours", mandatory_tours)
    tracing.register_traceable_table('tours', mandatory_tours)
    pipeline.get_rn_generator().add_channel('tours', mandatory_tours)

    # - annotate persons
    persons = inject.get_table('persons').to_frame()

    # need to reindex as we only handled persons with cdap_activity == 'M'
    persons['mandatory_tour_frequency'] = choices.reindex(persons.index).fillna('').astype(str)

    expressions.assign_columns(
        df=persons,
        model_settings=model_settings.get('annotate_persons'),
        trace_label=tracing.extend_trace_label(trace_label, 'annotate_persons'))

    pipeline.replace_table("persons", persons)

    tracing.print_summary('mandatory_tour_frequency', persons.mandatory_tour_frequency,
                          value_counts=True)

    if trace_hh_id:
        tracing.trace_df(mandatory_tours,
                         label="mandatory_tour_frequency.mandatory_tours",
                         warn_if_empty=True)

        tracing.trace_df(persons,
                         label="mandatory_tour_frequency.persons",
                         warn_if_empty=True)
Esempio n. 48
0
    def __init__(self, model_settings, num_processes, shared_data=None, shared_data_lock=None):
        """

        Presence of shared_data is used as a flag for multiprocessing
        If we are multiprocessing, shared_data should be a multiprocessing.RawArray buffer
        to aggregate modeled_size across all sub-processes, and shared_data_lock should be
        a multiprocessing.Lock object to coordinate access to that buffer.

        Optionally load saved shadow_prices from data_dir if config setting use_shadow_pricing
        and shadow_setting LOAD_SAVED_SHADOW_PRICES are both True

        Parameters
        ----------
        model_settings : dict
        shared_data : multiprocessing.Array or None (if single process)
        shared_data_lock : numpy array wrapping multiprocessing.RawArray or None (if single process)
        """

        self.num_processes = num_processes
        self.use_shadow_pricing = bool(config.setting('use_shadow_pricing'))
        self.saved_shadow_price_file_path = None  # set by read_saved_shadow_prices if loaded

        self.model_selector = model_settings['MODEL_SELECTOR']

        full_model_run = config.setting('households_sample_size') == 0
        if self.use_shadow_pricing and not full_model_run:
            logging.warning("deprecated combination of use_shadow_pricing and not full_model_run")

        self.segment_ids = model_settings['SEGMENT_IDS']

        # - modeled_size (set by call to set_choices/synchronize_choices)
        self.modeled_size = None

        if self.use_shadow_pricing:
            self.shadow_settings = config.read_model_settings('shadow_pricing.yaml')

            for k in self.shadow_settings:
                logger.debug("shadow_settings %s: %s" % (k, self.shadow_settings.get(k)))

        # - destination_size_table (desired_size)
        self.desired_size = inject.get_table(size_table_name(self.model_selector)).to_frame()

        # - shared_data
        if shared_data is not None:
            assert shared_data.shape[0] == self.desired_size.shape[0]
            assert shared_data.shape[1] == self.desired_size.shape[1] + 1  # tally column
            assert shared_data_lock is not None
        self.shared_data = shared_data
        self.shared_data_lock = shared_data_lock

        # - load saved shadow_prices (if available) and set max_iterations accordingly
        if self.use_shadow_pricing:
            self.shadow_prices = None
            self.shadow_price_method = self.shadow_settings['SHADOW_PRICE_METHOD']
            assert self.shadow_price_method in ['daysim', 'ctramp']

            if self.shadow_settings['LOAD_SAVED_SHADOW_PRICES']:
                # read_saved_shadow_prices logs error and returns None if file not found
                self.shadow_prices = self.read_saved_shadow_prices(model_settings)

            if self.shadow_prices is None:
                self.max_iterations = self.shadow_settings.get('MAX_ITERATIONS', 5)
            else:
                self.max_iterations = self.shadow_settings.get('MAX_ITERATIONS_SAVED', 1)

            # initial_shadow_price if we did not load
            if self.shadow_prices is None:
                # initial value depends on method
                initial_shadow_price = 1.0 if self.shadow_price_method == 'ctramp' else 0.0
                self.shadow_prices = \
                    pd.DataFrame(data=initial_shadow_price,
                                 columns=self.desired_size.columns,
                                 index=self.desired_size.index)
        else:
            self.max_iterations = 1

        self.num_fail = pd.DataFrame(index=self.desired_size.columns)
        self.max_abs_diff = pd.DataFrame(index=self.desired_size.columns)
        self.max_rel_diff = pd.DataFrame(index=self.desired_size.columns)
def parking_location(trips, trips_merged, land_use, network_los, chunk_size,
                     trace_hh_id):
    """
    Given a set of trips, each trip needs to have a parking location if
    it is eligible for remote parking.
    """

    trace_label = 'parking_location'
    model_settings = config.read_model_settings('parking_location_choice.yaml')
    alt_destination_col_name = model_settings['ALT_DEST_COL_NAME']

    preprocessor_settings = model_settings.get('PREPROCESSOR', None)

    trips_df = trips.to_frame()
    trips_merged_df = trips_merged.to_frame()
    land_use_df = land_use.to_frame()

    locals_dict = {'network_los': network_los}
    locals_dict.update(config.get_model_constants(model_settings))

    if preprocessor_settings:
        expressions.assign_columns(df=trips_merged_df,
                                   model_settings=preprocessor_settings,
                                   locals_dict=locals_dict,
                                   trace_label=trace_label)

    parking_locations, save_sample_df = run_parking_destination(
        model_settings,
        trips_merged_df,
        land_use_df,
        chunk_size=chunk_size,
        trace_hh_id=trace_hh_id,
        trace_label=trace_label,
    )

    assign_in_place(trips_df,
                    parking_locations.to_frame(alt_destination_col_name))

    pipeline.replace_table("trips", trips_df)

    if trace_hh_id:
        tracing.trace_df(trips_df,
                         label=trace_label,
                         slicer='trip_id',
                         index_label='trip_id',
                         warn_if_empty=True)

    if save_sample_df is not None:
        assert len(save_sample_df.index.get_level_values(0).unique()) == \
               len(trips_df[trips_df.trip_num < trips_df.trip_count])

        sample_table_name = model_settings.get(
            'PARKING_LOCATION_SAMPLE_TABLE_NAME')
        assert sample_table_name is not None

        logger.info("adding %s samples to %s" %
                    (len(save_sample_df), sample_table_name))

        # lest they try to put tour samples into the same table
        if pipeline.is_table(sample_table_name):
            raise RuntimeError("sample table %s already exists" %
                               sample_table_name)
        pipeline.extend_table(sample_table_name, save_sample_df)
Esempio n. 50
0
def compute_logsums(
        primary_purpose,
        trips,
        destination_sample,
        tours_merged,
        model_settings,
        skims,
        chunk_size, trace_hh_id,
        trace_label):
    """
    Calculate mode choice logsums using the same recipe as for trip_mode_choice, but do it twice
    for each alternative since we need out-of-direction logsum
    (i.e . origin to alt_dest, and alt_dest to half-tour destination)

    Returns
    -------
        adds od_logsum and dp_logsum columns to trips (in place)
    """
    trace_label = tracing.extend_trace_label(trace_label, 'compute_logsums')
    logger.info("Running %s with %d samples", trace_label, destination_sample.shape[0])

    # - trips_merged - merge trips and tours_merged
    trips_merged = pd.merge(
        trips,
        tours_merged,
        left_on='tour_id',
        right_index=True,
        how="left")
    assert trips_merged.index.equals(trips.index)

    # - choosers - merge destination_sample and trips_merged
    # re/set index because pandas merge does not preserve left index if it has duplicate values!
    choosers = pd.merge(destination_sample,
                        trips_merged.reset_index(),
                        left_index=True,
                        right_on='trip_id',
                        how="left",
                        suffixes=('', '_r')).set_index('trip_id')
    assert choosers.index.equals(destination_sample.index)

    logsum_settings = config.read_model_settings(model_settings['LOGSUM_SETTINGS'])

    omnibus_coefficient_spec = \
        assign.read_constant_spec(config.config_file_path(logsum_settings['COEFFS']))

    coefficient_spec = omnibus_coefficient_spec[primary_purpose]

    constants = config.get_model_constants(logsum_settings)
    locals_dict = assign.evaluate_constants(coefficient_spec, constants=constants)
    locals_dict.update(constants)

    # - od_logsums
    od_skims = {
        'ORIGIN': model_settings['TRIP_ORIGIN'],
        'DESTINATION': model_settings['ALT_DEST'],
        "odt_skims": skims['odt_skims'],
        "od_skims": skims['od_skims'],
    }
    destination_sample['od_logsum'] = compute_ood_logsums(
        choosers,
        logsum_settings,
        od_skims,
        locals_dict,
        chunk_size,
        trace_label=tracing.extend_trace_label(trace_label, 'od'))

    # - dp_logsums
    dp_skims = {
        'ORIGIN': model_settings['ALT_DEST'],
        'DESTINATION': model_settings['PRIMARY_DEST'],
        "odt_skims": skims['dpt_skims'],
        "od_skims": skims['dp_skims'],
    }
    destination_sample['dp_logsum'] = compute_ood_logsums(
        choosers,
        logsum_settings,
        dp_skims,
        locals_dict,
        chunk_size,
        trace_label=tracing.extend_trace_label(trace_label, 'dp'))
Esempio n. 51
0
def mandatory_scheduling_settings(configs_dir):
    return config.read_model_settings(configs_dir, 'mandatory_scheduling.yaml')
def atwork_subtour_frequency(tours, persons_merged, chunk_size, trace_hh_id):
    """
    This model predicts the frequency of making at-work subtour tours
    (alternatives for this model come from a separate csv file which is
    configured by the user).
    """

    trace_label = 'atwork_subtour_frequency'
    model_settings_file_name = 'atwork_subtour_frequency.yaml'

    tours = tours.to_frame()
    work_tours = tours[tours.tour_type == 'work']

    # - if no work_tours
    if len(work_tours) == 0:
        add_null_results(trace_label, tours)
        return

    model_settings = config.read_model_settings(model_settings_file_name)
    estimator = estimation.manager.begin_estimation('atwork_subtour_frequency')

    model_spec = simulate.read_model_spec(file_name=model_settings['SPEC'])
    coefficients_df = simulate.read_model_coefficients(model_settings)
    model_spec = simulate.eval_coefficients(model_spec, coefficients_df,
                                            estimator)

    alternatives = simulate.read_model_alts(
        'atwork_subtour_frequency_alternatives.csv', set_index='alt')

    # merge persons into work_tours
    persons_merged = persons_merged.to_frame()
    work_tours = pd.merge(work_tours,
                          persons_merged,
                          left_on='person_id',
                          right_index=True)

    logger.info("Running atwork_subtour_frequency with %d work tours",
                len(work_tours))

    nest_spec = config.get_logit_model_settings(model_settings)
    constants = config.get_model_constants(model_settings)

    # - preprocessor
    preprocessor_settings = model_settings.get('preprocessor', None)
    if preprocessor_settings:

        expressions.assign_columns(df=work_tours,
                                   model_settings=preprocessor_settings,
                                   trace_label=trace_label)

    if estimator:
        estimator.write_spec(model_settings)
        estimator.write_model_settings(model_settings,
                                       model_settings_file_name)
        estimator.write_coefficients(coefficients_df, model_settings)
        estimator.write_choosers(work_tours)

    choices = simulate.simple_simulate(
        choosers=work_tours,
        spec=model_spec,
        nest_spec=nest_spec,
        locals_d=constants,
        chunk_size=chunk_size,
        trace_label=trace_label,
        trace_choice_name='atwork_subtour_frequency',
        estimator=estimator)

    # convert indexes to alternative names
    choices = pd.Series(model_spec.columns[choices.values],
                        index=choices.index)

    if estimator:
        estimator.write_choices(choices)
        choices = estimator.get_survey_values(choices, 'tours',
                                              'atwork_subtour_frequency')
        estimator.write_override_choices(choices)
        estimator.end_estimation()

    # add atwork_subtour_frequency column to tours
    # reindex since we are working with a subset of tours
    tours['atwork_subtour_frequency'] = choices.reindex(tours.index)
    pipeline.replace_table("tours", tours)

    # - create atwork_subtours based on atwork_subtour_frequency choice names
    work_tours = tours[tours.tour_type == 'work']
    assert not work_tours.atwork_subtour_frequency.isnull().any()

    subtours = process_atwork_subtours(work_tours, alternatives)

    tours = pipeline.extend_table("tours", subtours)

    tracing.register_traceable_table('tours', subtours)
    pipeline.get_rn_generator().add_channel('tours', subtours)

    tracing.print_summary('atwork_subtour_frequency',
                          tours.atwork_subtour_frequency,
                          value_counts=True)

    if trace_hh_id:
        tracing.trace_df(tours, label='atwork_subtour_frequency.tours')
Esempio n. 53
0
def joint_tour_scheduling(
        tours,
        persons_merged,
        tdd_alts,
        chunk_size,
        trace_hh_id):
    """
    This model predicts the departure time and duration of each joint tour
    """
    trace_label = 'joint_tour_scheduling'

    model_settings_file_name = 'joint_tour_scheduling.yaml'
    model_settings = config.read_model_settings(model_settings_file_name)

    tours = tours.to_frame()
    joint_tours = tours[tours.tour_category == 'joint']

    # - if no joint tours
    if joint_tours.shape[0] == 0:
        tracing.no_results(trace_label)
        return

    # use inject.get_table as this won't exist if there are no joint_tours
    joint_tour_participants = inject.get_table('joint_tour_participants').to_frame()

    persons_merged = persons_merged.to_frame()

    logger.info("Running %s with %d joint tours", trace_label, joint_tours.shape[0])

    # it may seem peculiar that we are concerned with persons rather than households
    # but every joint tour is (somewhat arbitrarily) assigned a "primary person"
    # some of whose characteristics are used in the spec
    # and we get household attributes along with person attributes in persons_merged
    persons_merged = persons_merged[persons_merged.num_hh_joint_tours > 0]

    # since a households joint tours each potentially different participants
    # they may also have different joint tour masks (free time of all participants)
    # so we have to either chunk processing by joint_tour_num and build timetable by household
    # or build timetables by unique joint_tour

    constants = config.get_model_constants(model_settings)

    # - run preprocessor to annotate choosers
    preprocessor_settings = model_settings.get('preprocessor', None)
    if preprocessor_settings:

        locals_d = {}
        if constants is not None:
            locals_d.update(constants)

        expressions.assign_columns(
            df=joint_tours,
            model_settings=preprocessor_settings,
            locals_dict=locals_d,
            trace_label=trace_label)

    timetable = inject.get_injectable("timetable")

    estimator = estimation.manager.begin_estimation('joint_tour_scheduling')

    model_spec = simulate.read_model_spec(file_name=model_settings['SPEC'])
    coefficients_df = simulate.read_model_coefficients(model_settings)
    model_spec = simulate.eval_coefficients(model_spec, coefficients_df, estimator)

    if estimator:
        estimator.write_model_settings(model_settings, model_settings_file_name)
        estimator.write_spec(model_settings)
        estimator.write_coefficients(coefficients_df)
        timetable.begin_transaction(estimator)

    choices = vectorize_joint_tour_scheduling(
        joint_tours, joint_tour_participants,
        persons_merged,
        tdd_alts, timetable,
        spec=model_spec,
        model_settings=model_settings,
        estimator=estimator,
        chunk_size=chunk_size,
        trace_label=trace_label)

    if estimator:
        estimator.write_choices(choices)
        choices = estimator.get_survey_values(choices, 'tours', 'tdd')
        estimator.write_override_choices(choices)
        estimator.end_estimation()

        # update timetable to reflect the override choices (assign tours in tour_num order)
        timetable.rollback()
        for tour_num, nth_tours in joint_tours.groupby('tour_num', sort=True):
            nth_participants = \
                joint_tour_participants[joint_tour_participants.tour_id.isin(nth_tours.index)]

            estimator.log("assign timetable for %s participants in %s tours with tour_num %s" %
                          (len(nth_participants), len(nth_tours), tour_num))
            # - update timetables of all joint tour participants
            timetable.assign(nth_participants.person_id, reindex(choices, nth_participants.tour_id))

    timetable.replace_table()

    # choices are tdd alternative ids
    # we want to add start, end, and duration columns to tours, which we have in tdd_alts table
    choices = pd.merge(choices.to_frame('tdd'), tdd_alts, left_on=['tdd'], right_index=True, how='left')

    assign_in_place(tours, choices)
    pipeline.replace_table("tours", tours)

    # updated df for tracing
    joint_tours = tours[tours.tour_category == 'joint']

    if trace_hh_id:
        tracing.trace_df(joint_tours,
                         label="joint_tour_scheduling",
                         slicer='household_id')
def trip_purpose_and_destination(
        trips,
        tours_merged,
        chunk_size,
        trace_hh_id):

    trace_label = "trip_purpose_and_destination"
    model_settings = config.read_model_settings('trip_purpose_and_destination.yaml')

    MAX_ITERATIONS = model_settings.get('MAX_ITERATIONS', 5)

    trips_df = trips.to_frame()
    tours_merged_df = tours_merged.to_frame()

    if trips_df.empty:
        logger.info("%s - no trips. Nothing to do." % trace_label)
        return

    # FIXME could allow MAX_ITERATIONS=0 to allow for cleanup-only run
    # in which case, we would need to drop bad trips, WITHOUT failing bad_trip leg_mates
    assert (MAX_ITERATIONS > 0)

    # if trip_destination has been run before, keep only failed trips (and leg_mates) to retry
    if 'destination' in trips_df:
        if trips_df.failed.any():
            logger.info('trip_destination has already been run. Rerunning failed trips')
            flag_failed_trip_leg_mates(trips_df, 'failed')
            trips_df = trips_df[trips_df.failed]
            tours_merged_df = tours_merged_df[tours_merged_df.index.isin(trips_df.tour_id)]
            logger.info('Rerunning %s failed trips and leg-mates' % trips_df.shape[0])
        else:
            # no failed trips from prior run of trip_destination
            logger.info("%s - no failed trips from prior model run." % trace_label)
            del trips_df['failed']
            pipeline.replace_table("trips", trips_df)
            return

    results = []
    i = 0
    RESULT_COLUMNS = ['purpose', 'destination', 'origin', 'failed']
    while True:

        i += 1

        for c in RESULT_COLUMNS:
            if c in trips_df:
                del trips_df[c]

        trips_df = run_trip_purpose_and_destination(
            trips_df,
            tours_merged_df,
            chunk_size,
            trace_hh_id,
            trace_label=tracing.extend_trace_label(trace_label, "i%s" % i))

        num_failed_trips = trips_df.failed.sum()

        # if there were no failed trips, we are done
        if num_failed_trips == 0:
            results.append(trips_df[RESULT_COLUMNS])
            break

        logger.warning("%s %s failed trips in iteration %s" % (trace_label, num_failed_trips, i))
        file_name = "%s_i%s_failed_trips" % (trace_label, i)
        logger.info("writing failed trips to %s" % file_name)
        tracing.write_csv(trips_df[trips_df.failed], file_name=file_name, transpose=False)

        # if max iterations reached, add remaining trips to results and give up
        # note that we do this BEFORE failing leg_mates so resulting trip legs are complete
        if i >= MAX_ITERATIONS:
            logger.warning("%s too many iterations %s" % (trace_label, i))
            results.append(trips_df[RESULT_COLUMNS])
            break

        # otherwise, if any trips failed, then their leg-mates trips must also fail
        flag_failed_trip_leg_mates(trips_df, 'failed')

        # add the good trips to results
        results.append(trips_df[~trips_df.failed][RESULT_COLUMNS])

        # and keep the failed ones to retry
        trips_df = trips_df[trips_df.failed]
        tours_merged_df = tours_merged_df[tours_merged_df.index.isin(trips_df.tour_id)]

    # - assign result columns to trips
    results = pd.concat(results)

    logger.info("%s %s failed trips after %s iterations" % (trace_label, results.failed.sum(), i))

    trips_df = trips.to_frame()
    assign_in_place(trips_df, results)

    trips_df = cleanup_failed_trips(trips_df)

    pipeline.replace_table("trips", trips_df)

    if trace_hh_id:
        tracing.trace_df(trips_df,
                         label=trace_label,
                         slicer='trip_id',
                         index_label='trip_id',
                         warn_if_empty=True)
Esempio n. 55
0
def _compute_logsums(alt_tdd, tours_merged, tour_purpose, model_settings,
                     network_los, skims, trace_label):
    """
    compute logsums for tours using skims for alt_tdd out_period and in_period
    """

    trace_label = tracing.extend_trace_label(trace_label, 'logsums')

    with chunk.chunk_log(trace_label):
        logsum_settings = config.read_model_settings(
            model_settings['LOGSUM_SETTINGS'])

        choosers = alt_tdd.join(tours_merged, how='left', rsuffix='_chooser')
        logger.info(
            f"{trace_label} compute_logsums for {choosers.shape[0]} choosers {alt_tdd.shape[0]} alts"
        )

        # - locals_dict
        constants = config.get_model_constants(logsum_settings)
        locals_dict = {}
        locals_dict.update(constants)

        if network_los.zone_system == los.THREE_ZONE:
            # TVPB constants can appear in expressions
            locals_dict.update(
                network_los.setting(
                    'TVPB_SETTINGS.tour_mode_choice.CONSTANTS'))

        locals_dict.update(skims)

        # constrained coefficients can appear in expressions
        coefficients = simulate.get_segment_coefficients(
            logsum_settings, tour_purpose)
        locals_dict.update(coefficients)

        # - run preprocessor to annotate choosers
        # allow specification of alternate preprocessor for nontour choosers
        preprocessor = model_settings.get('LOGSUM_PREPROCESSOR',
                                          'preprocessor')
        preprocessor_settings = logsum_settings[preprocessor]

        if preprocessor_settings:

            simulate.set_skim_wrapper_targets(choosers, skims)

            expressions.assign_columns(df=choosers,
                                       model_settings=preprocessor_settings,
                                       locals_dict=locals_dict,
                                       trace_label=trace_label)

        # - compute logsums
        logsum_spec = simulate.read_model_spec(
            file_name=logsum_settings['SPEC'])
        logsum_spec = simulate.eval_coefficients(logsum_spec,
                                                 coefficients,
                                                 estimator=None)

        nest_spec = config.get_logit_model_settings(logsum_settings)
        nest_spec = simulate.eval_nest_coefficients(nest_spec, coefficients,
                                                    trace_label)

        logsums = simulate.simple_simulate_logsums(choosers,
                                                   logsum_spec,
                                                   nest_spec,
                                                   skims=skims,
                                                   locals_d=locals_dict,
                                                   chunk_size=0,
                                                   trace_label=trace_label)

    return logsums
Esempio n. 56
0
def cdap_simulate(persons_merged, persons, households,
                  cdap_indiv_spec,
                  cdap_interaction_coefficients,
                  cdap_fixed_relative_proportions,
                  chunk_size, trace_hh_id):
    """
    CDAP stands for Coordinated Daily Activity Pattern, which is a choice of
    high-level activity pattern for each person, in a coordinated way with other
    members of a person's household.

    Because Python requires vectorization of computation, there are some specialized
    routines in the cdap directory of activitysim for this purpose.  This module
    simply applies those utilities using the simulation framework.
    """

    trace_label = 'cdap'
    model_settings = config.read_model_settings('cdap.yaml')

    persons_merged = persons_merged.to_frame()

    constants = config.get_model_constants(model_settings)

    cdap_interaction_coefficients = \
        cdap.preprocess_interaction_coefficients(cdap_interaction_coefficients)

    # specs are built just-in-time on demand and cached as injectables
    # prebuilding here allows us to write them to the output directory
    # (also when multiprocessing locutor might not see all household sizes)
    logger.info("Pre-building cdap specs")
    for hhsize in range(2, cdap.MAX_HHSIZE + 1):
        spec = cdap.build_cdap_spec(cdap_interaction_coefficients, hhsize, cache=True)
        if inject.get_injectable('locutor', False):
            spec.to_csv(config.output_file_path('cdap_spec_%s.csv' % hhsize), index=True)

    logger.info("Running cdap_simulate with %d persons", len(persons_merged.index))

    choices = cdap.run_cdap(
        persons=persons_merged,
        cdap_indiv_spec=cdap_indiv_spec,
        cdap_interaction_coefficients=cdap_interaction_coefficients,
        cdap_fixed_relative_proportions=cdap_fixed_relative_proportions,
        locals_d=constants,
        chunk_size=chunk_size,
        trace_hh_id=trace_hh_id,
        trace_label=trace_label)

    # - assign results to persons table and annotate
    persons = persons.to_frame()

    choices = choices.reindex(persons.index)
    persons['cdap_activity'] = choices.cdap_activity
    persons['cdap_rank'] = choices.cdap_rank

    expressions.assign_columns(
        df=persons,
        model_settings=model_settings.get('annotate_persons'),
        trace_label=tracing.extend_trace_label(trace_label, 'annotate_persons'))

    pipeline.replace_table("persons", persons)

    # - annotate households table
    households = households.to_frame()
    expressions.assign_columns(
        df=households,
        model_settings=model_settings.get('annotate_households'),
        trace_label=tracing.extend_trace_label(trace_label, 'annotate_households'))
    pipeline.replace_table("households", households)

    tracing.print_summary('cdap_activity', persons.cdap_activity, value_counts=True)
    logger.info("cdap crosstabs:\n%s" %
                pd.crosstab(persons.ptype, persons.cdap_activity, margins=True))

    if trace_hh_id:

        tracing.trace_df(inject.get_table('persons_merged').to_frame(),
                         label="cdap",
                         columns=['ptype', 'cdap_rank', 'cdap_activity'],
                         warn_if_empty=True)
Esempio n. 57
0
def stop_frequency(
        tours, tours_merged,
        stop_frequency_alts,
        skim_dict,
        chunk_size,
        trace_hh_id):
    """
    stop frequency model

    For each tour, shoose a number of intermediate inbound stops and outbound stops.
    Create a trip table with inbound and outbound trips.

    Thus, a tour with stop_frequency '2out_0in' will have two outbound and zero inbound stops,
    and four corresponding trips: three outbound, and one inbound.

    Adds stop_frequency str column to trips, with fields

    creates trips table with columns:

    ::

        - person_id
        - household_id
        - tour_id
        - primary_purpose
        - atwork
        - trip_num
        - outbound
        - trip_count

    """

    trace_label = 'stop_frequency'
    model_settings = config.read_model_settings('stop_frequency.yaml')

    tours = tours.to_frame()
    tours_merged = tours_merged.to_frame()

    assert not tours_merged.household_id.isnull().any()

    assert not (tours_merged.origin == -1).any()
    assert not (tours_merged.destination == -1).any()

    nest_spec = config.get_logit_model_settings(model_settings)
    constants = config.get_model_constants(model_settings)

    # - run preprocessor to annotate tours_merged
    preprocessor_settings = model_settings.get('preprocessor', None)
    if preprocessor_settings:

        # hack: preprocessor adds origin column in place if it does not exist already
        od_skim_stack_wrapper = skim_dict.wrap('origin', 'destination')
        skims = [od_skim_stack_wrapper]

        locals_dict = {
            "od_skims": od_skim_stack_wrapper
        }
        if constants is not None:
            locals_dict.update(constants)

        simulate.set_skim_wrapper_targets(tours_merged, skims)

        # this should be pre-slice as some expressions may count tours by type
        annotations = expressions.compute_columns(
            df=tours_merged,
            model_settings=preprocessor_settings,
            locals_dict=locals_dict,
            trace_label=trace_label)

        assign_in_place(tours_merged, annotations)

    tracing.print_summary('stop_frequency segments',
                          tours_merged.primary_purpose, value_counts=True)

    choices_list = []
    for segment_type, choosers in tours_merged.groupby('primary_purpose'):

        logging.info("%s running segment %s with %s chooser rows" %
                     (trace_label, segment_type, choosers.shape[0]))

        spec = simulate.read_model_spec(file_name='stop_frequency_%s.csv' % segment_type)

        assert spec is not None, "spec for segment_type %s not found" % segment_type

        choices = simulate.simple_simulate(
            choosers=choosers,
            spec=spec,
            nest_spec=nest_spec,
            locals_d=constants,
            chunk_size=chunk_size,
            trace_label=tracing.extend_trace_label(trace_label, segment_type),
            trace_choice_name='stops')

        # convert indexes to alternative names
        choices = pd.Series(spec.columns[choices.values], index=choices.index)

        choices_list.append(choices)

    choices = pd.concat(choices_list)

    tracing.print_summary('stop_frequency', choices, value_counts=True)

    # add stop_frequency choices to tours table
    assign_in_place(tours, choices.to_frame('stop_frequency'))

    if 'primary_purpose' not in tours.columns:
        assign_in_place(tours, tours_merged[['primary_purpose']])

    pipeline.replace_table("tours", tours)

    # create trips table
    trips = process_trips(tours, stop_frequency_alts)
    trips = pipeline.extend_table("trips", trips)
    tracing.register_traceable_table('trips', trips)
    pipeline.get_rn_generator().add_channel('trips', trips)

    if trace_hh_id:
        tracing.trace_df(tours,
                         label="stop_frequency.tours",
                         slicer='person_id',
                         columns=None)

        tracing.trace_df(trips,
                         label="stop_frequency.trips",
                         slicer='person_id',
                         columns=None)

        tracing.trace_df(annotations,
                         label="stop_frequency.annotations",
                         columns=None)

        tracing.trace_df(tours_merged,
                         label="stop_frequency.tours_merged",
                         slicer='person_id',
                         columns=None)
def _compute_logsums(alt_tdd, tours_merged, tour_purpose, model_settings, trace_label):
    """
    compute logsums for tours using skims for alt_tdd out_period and in_period
    """

    trace_label = tracing.extend_trace_label(trace_label, 'logsums')

    logsum_settings = config.read_model_settings(model_settings['LOGSUM_SETTINGS'])

    choosers = alt_tdd.join(tours_merged, how='left', rsuffix='_chooser')
    logger.info("%s compute_logsums for %d choosers%s alts" %
                (trace_label, choosers.shape[0], alt_tdd.shape[0]))

    # - setup skims

    skim_dict = inject.get_injectable('skim_dict')
    skim_stack = inject.get_injectable('skim_stack')

    orig_col_name = 'TAZ'
    dest_col_name = model_settings.get('DESTINATION_FOR_TOUR_PURPOSE').get(tour_purpose)

    odt_skim_stack_wrapper = skim_stack.wrap(left_key=orig_col_name, right_key=dest_col_name,
                                             skim_key='out_period')
    dot_skim_stack_wrapper = skim_stack.wrap(left_key=dest_col_name, right_key=orig_col_name,
                                             skim_key='in_period')
    od_skim_stack_wrapper = skim_dict.wrap(orig_col_name, dest_col_name)

    skims = {
        "odt_skims": odt_skim_stack_wrapper,
        "dot_skims": dot_skim_stack_wrapper,
        "od_skims": od_skim_stack_wrapper,
        'orig_col_name': orig_col_name,
        'dest_col_name': dest_col_name,
    }

    # - locals_dict
    constants = config.get_model_constants(logsum_settings)

    omnibus_coefficient_spec = get_coeffecients_spec(logsum_settings)
    coefficient_spec = omnibus_coefficient_spec[tour_purpose]
    coefficients = assign.evaluate_constants(coefficient_spec, constants=constants)

    locals_dict = {}
    locals_dict.update(coefficients)
    locals_dict.update(constants)
    locals_dict.update(skims)

    # - run preprocessor to annotate choosers
    # allow specification of alternate preprocessor for nontour choosers
    preprocessor = model_settings.get('LOGSUM_PREPROCESSOR', 'preprocessor')
    preprocessor_settings = logsum_settings[preprocessor]

    if preprocessor_settings:

        simulate.set_skim_wrapper_targets(choosers, skims)

        expressions.assign_columns(
            df=choosers,
            model_settings=preprocessor_settings,
            locals_dict=locals_dict,
            trace_label=trace_label)

    # - compute logsums
    logsum_spec = get_logsum_spec(logsum_settings)
    nest_spec = config.get_logit_model_settings(logsum_settings)

    logsums = simulate.simple_simulate_logsums(
        choosers,
        logsum_spec,
        nest_spec,
        skims=skims,
        locals_d=locals_dict,
        chunk_size=0,
        trace_label=trace_label)

    return logsums
def telecommute_frequency(
        persons_merged, persons,
        chunk_size, trace_hh_id):
    """
    This model predicts the frequency of telecommute for a person (worker) who
    does not works from home. The alternatives of this model are 'No Telecommute',
    '1 day per week', '2 to 3 days per week' and '4 days per week'. This model
    reflects the choices of people who prefer a combination of working from home and
    office during a week.
    """

    trace_label = 'telecommute_frequency'
    model_settings_file_name = 'telecommute_frequency.yaml'

    choosers = persons_merged.to_frame()
    choosers = choosers[choosers.workplace_zone_id > -1]

    logger.info("Running %s with %d persons", trace_label, len(choosers))

    model_settings = config.read_model_settings(model_settings_file_name)
    estimator = estimation.manager.begin_estimation('telecommute_frequency')

    constants = config.get_model_constants(model_settings)

    # - preprocessor
    preprocessor_settings = model_settings.get('preprocessor', None)
    if preprocessor_settings:

        locals_d = {}
        if constants is not None:
            locals_d.update(constants)

        expressions.assign_columns(
            df=choosers,
            model_settings=preprocessor_settings,
            locals_dict=locals_d,
            trace_label=trace_label)

    model_spec = simulate.read_model_spec(file_name=model_settings['SPEC'])
    coefficients_df = simulate.read_model_coefficients(model_settings)
    model_spec = simulate.eval_coefficients(model_spec, coefficients_df, estimator)

    nest_spec = config.get_logit_model_settings(model_settings)

    if estimator:
        estimator.write_model_settings(model_settings, model_settings_file_name)
        estimator.write_spec(model_settings)
        estimator.write_coefficients(coefficients_df)
        estimator.write_choosers(choosers)

    choices = simulate.simple_simulate(
        choosers=choosers,
        spec=model_spec,
        nest_spec=nest_spec,
        locals_d=constants,
        chunk_size=chunk_size,
        trace_label=trace_label,
        trace_choice_name='telecommute_frequency',
        estimator=estimator)

    choices = pd.Series(model_spec.columns[choices.values], index=choices.index)

    if estimator:
        estimator.write_choices(choices)
        choices = estimator.get_survey_values(choices, 'persons', 'telecommute_frequency')
        estimator.write_override_choices(choices)
        estimator.end_estimation()

    persons = persons.to_frame()
    persons['telecommute_frequency'] = choices.reindex(persons.index).fillna('').astype(str)

    pipeline.replace_table("persons", persons)

    tracing.print_summary('telecommute_frequency', persons.telecommute_frequency, value_counts=True)

    if trace_hh_id:
        tracing.trace_df(persons,
                         label=trace_label,
                         warn_if_empty=True)