Example #1
0
def free_parking(
        persons_merged, persons, households,
        skim_dict, skim_stack,
        chunk_size, trace_hh_id, locutor):
    """

    """

    trace_label = 'free_parking'
    model_settings = config.read_model_settings('free_parking.yaml')

    choosers = persons_merged.to_frame()
    choosers = choosers[choosers.workplace_taz > -1]

    logger.info("Running %s with %d persons", trace_label, len(choosers))

    constants = config.get_model_constants(model_settings)

    # - preprocessor
    preprocessor_settings = model_settings.get('preprocessor', None)
    if preprocessor_settings:

        locals_d = {}
        if constants is not None:
            locals_d.update(constants)

        expressions.assign_columns(
            df=choosers,
            model_settings=preprocessor_settings,
            locals_dict=locals_d,
            trace_label=trace_label)

    model_spec = simulate.read_model_spec(file_name='free_parking.csv')
    nest_spec = config.get_logit_model_settings(model_settings)

    choices = simulate.simple_simulate(
        choosers=choosers,
        spec=model_spec,
        nest_spec=nest_spec,
        locals_d=constants,
        chunk_size=chunk_size,
        trace_label=trace_label,
        trace_choice_name='free_parking_at_work')

    persons = persons.to_frame()

    # no need to reindex as we used all households
    free_parking_alt = model_settings['FREE_PARKING_ALT']
    choices = (choices == free_parking_alt)
    persons['free_parking_at_work'] = choices.reindex(persons.index).fillna(0).astype(bool)

    pipeline.replace_table("persons", persons)

    tracing.print_summary('free_parking', persons.free_parking_at_work, value_counts=True)

    if trace_hh_id:
        tracing.trace_df(persons,
                         label=trace_label,
                         warn_if_empty=True)
Example #2
0
def run_destination_sample(
        spec_segment_name,
        tours,
        persons_merged,
        model_settings,
        skim_dict,
        destination_size_terms,
        chunk_size, trace_label):

    model_spec_file_name = model_settings['SAMPLE_SPEC']
    model_spec = simulate.read_model_spec(file_name=model_spec_file_name)
    model_spec = model_spec[[spec_segment_name]]

    # merge persons into tours
    choosers = pd.merge(tours, persons_merged, left_on='person_id', right_index=True, how='left')
    # FIXME - MEMORY HACK - only include columns actually used in spec
    chooser_columns = model_settings['SIMULATE_CHOOSER_COLUMNS']
    choosers = choosers[chooser_columns]

    constants = config.get_model_constants(model_settings)

    sample_size = model_settings["SAMPLE_SIZE"]
    alt_dest_col_name = model_settings["ALT_DEST_COL_NAME"]

    logger.info("running %s with %d tours", trace_label, len(choosers))

    # create wrapper with keys for this lookup - in this case there is a workplace_taz
    # in the choosers and a TAZ in the alternatives which get merged during interaction
    # (logit.interaction_dataset suffixes duplicate chooser column with '_chooser')
    # the skims will be available under the name "skims" for any @ expressions
    origin_col_name = model_settings['CHOOSER_ORIG_COL_NAME']
    if origin_col_name == 'TAZ':
        origin_col_name = 'TAZ_chooser'
    skims = skim_dict.wrap(origin_col_name, 'TAZ')

    locals_d = {
        'skims': skims
    }
    if constants is not None:
        locals_d.update(constants)

    choices = interaction_sample(
        choosers,
        alternatives=destination_size_terms,
        sample_size=sample_size,
        alt_col_name=alt_dest_col_name,
        spec=model_spec,
        skims=skims,
        locals_d=locals_d,
        chunk_size=chunk_size,
        trace_label=trace_label)

    # remember person_id in chosen alts so we can merge with persons in subsequent steps
    # (broadcasts person_id onto all alternatives sharing the same tour_id index value)
    choices['person_id'] = choosers.person_id

    return choices
Example #3
0
def run_location_simulate(
        segment_name,
        persons_merged,
        location_sample_df,
        skim_dict,
        dest_size_terms,
        model_settings,
        chunk_size, trace_label):
    """
    run location model on location_sample annotated with mode_choice logsum
    to select a dest zone from sample alternatives
    """
    assert not persons_merged.empty

    model_spec = simulate.read_model_spec(file_name=model_settings['SPEC'])

    # FIXME - MEMORY HACK - only include columns actually used in spec
    chooser_columns = model_settings['SIMULATE_CHOOSER_COLUMNS']
    choosers = persons_merged[chooser_columns]

    alt_dest_col_name = model_settings["ALT_DEST_COL_NAME"]

    # alternatives are pre-sampled and annotated with logsums and pick_count
    # but we have to merge additional alt columns into alt sample list
    alternatives = \
        pd.merge(location_sample_df, dest_size_terms,
                 left_on=alt_dest_col_name, right_index=True, how="left")

    logger.info("Running %s with %d persons" % (trace_label, len(choosers)))

    # create wrapper with keys for this lookup - in this case there is a TAZ in the choosers
    # and a TAZ in the alternatives which get merged during interaction
    # the skims will be available under the name "skims" for any @ expressions
    skims = skim_dict.wrap("TAZ_chooser", alt_dest_col_name)

    locals_d = {
        'skims': skims,
        'segment_size': segment_name
    }
    constants = config.get_model_constants(model_settings)
    if constants is not None:
        locals_d.update(constants)

    choices = interaction_sample_simulate(
        choosers,
        alternatives,
        spec=spec_for_segment(model_spec, segment_name),
        choice_column=alt_dest_col_name,
        skims=skims,
        locals_d=locals_d,
        chunk_size=chunk_size,
        trace_label=trace_label,
        trace_choice_name=model_settings['DEST_CHOICE_COLUMN_NAME'])

    return choices
Example #4
0
def trip_destination_sample(
        primary_purpose,
        trips,
        alternatives,
        model_settings,
        size_term_matrix, skims,
        chunk_size, trace_hh_id,
        trace_label):
    """

    Returns
    -------
    destination_sample: pandas.dataframe
        choices_df from interaction_sample with (up to) sample_size alts for each chooser row
        index (non unique) is trip_id from trips (duplicated for each alt)
        and columns dest_taz, prob, and pick_count

        dest_taz: int
            alt identifier (dest_taz) from alternatives[<alt_col_name>]
        prob: float
            the probability of the chosen alternative
        pick_count : int
            number of duplicate picks for chooser, alt
    """
    trace_label = tracing.extend_trace_label(trace_label, 'trip_destination_sample')

    spec = get_spec_for_purpose(model_settings, 'DESTINATION_SAMPLE_SPEC', primary_purpose)

    sample_size = model_settings["SAMPLE_SIZE"]
    alt_dest_col_name = model_settings["ALT_DEST"]

    logger.info("Running %s with %d trips", trace_label, trips.shape[0])

    locals_dict = config.get_model_constants(model_settings).copy()
    locals_dict.update({
        'size_terms': size_term_matrix
    })
    locals_dict.update(skims)

    destination_sample = interaction_sample(
        choosers=trips,
        alternatives=alternatives,
        sample_size=sample_size,
        alt_col_name=alt_dest_col_name,
        allow_zero_probs=True,
        spec=spec,
        skims=skims,
        locals_d=locals_dict,
        chunk_size=chunk_size,
        trace_label=trace_label)

    return destination_sample
def atwork_subtour_destination_sample(
        tours,
        persons_merged,
        skim_dict,
        destination_size_terms,
        chunk_size, trace_hh_id):

    trace_label = 'atwork_subtour_location_sample'
    model_settings = config.read_model_settings('atwork_subtour_destination.yaml')
    model_spec = simulate.read_model_spec(file_name='atwork_subtour_destination_sample.csv')

    # merge persons into tours
    choosers = pd.merge(tours, persons_merged, left_on='person_id', right_index=True)
    # FIXME - MEMORY HACK - only include columns actually used in spec
    chooser_columns = model_settings['SIMULATE_CHOOSER_COLUMNS']
    choosers = choosers[chooser_columns]

    constants = config.get_model_constants(model_settings)

    sample_size = model_settings["SAMPLE_SIZE"]
    alt_dest_col_name = model_settings["ALT_DEST_COL_NAME"]

    logger.info("Running atwork_subtour_location_sample with %d tours", len(choosers))

    # create wrapper with keys for this lookup - in this case there is a workplace_taz
    # in the choosers and a TAZ in the alternatives which get merged during interaction
    # the skims will be available under the name "skims" for any @ expressions
    skims = skim_dict.wrap('workplace_taz', 'TAZ')

    locals_d = {
        'skims': skims
    }
    if constants is not None:
        locals_d.update(constants)

    choices = interaction_sample(
        choosers,
        alternatives=destination_size_terms,
        sample_size=sample_size,
        alt_col_name=alt_dest_col_name,
        spec=model_spec,
        skims=skims,
        locals_d=locals_d,
        chunk_size=chunk_size,
        trace_label=trace_label)

    # remember person_id in chosen alts so we can merge with persons in subsequent steps
    choices['person_id'] = choosers.person_id

    return choices
Example #6
0
def trip_destination_simulate(
        primary_purpose,
        trips,
        destination_sample,
        model_settings,
        size_term_matrix, skims,
        chunk_size, trace_hh_id,
        trace_label):
    """
    Chose destination from destination_sample (with od_logsum and dp_logsum columns added)


    Returns
    -------
    choices - pandas.Series
        destination alt chosen
    """
    trace_label = tracing.extend_trace_label(trace_label, 'trip_destination_simulate')

    spec = get_spec_for_purpose(model_settings, 'DESTINATION_SPEC', primary_purpose)

    alt_dest_col_name = model_settings["ALT_DEST"]

    logger.info("Running trip_destination_simulate with %d trips", len(trips))

    locals_dict = config.get_model_constants(model_settings).copy()
    locals_dict.update({
        'size_terms': size_term_matrix
    })
    locals_dict.update(skims)

    destinations = interaction_sample_simulate(
        choosers=trips,
        alternatives=destination_sample,
        spec=spec,
        choice_column=alt_dest_col_name,
        allow_zero_probs=True, zero_prob_choice_val=NO_DESTINATION,
        skims=skims,
        locals_d=locals_dict,
        chunk_size=chunk_size,
        trace_label=trace_label,
        trace_choice_name='trip_dest')

    # drop any failed zero_prob destinations
    if (destinations == NO_DESTINATION).any():
        # logger.debug("dropping %s failed destinations", destinations == NO_DESTINATION).sum()
        destinations = destinations[destinations != NO_DESTINATION]

    return destinations
Example #7
0
def auto_ownership_simulate(households,
                            households_merged,
                            chunk_size,
                            trace_hh_id):
    """
    Auto ownership is a standard model which predicts how many cars a household
    with given characteristics owns
    """
    trace_label = 'auto_ownership_simulate'
    model_settings = config.read_model_settings('auto_ownership.yaml')

    logger.info("Running %s with %d households", trace_label, len(households_merged))

    model_spec = simulate.read_model_spec(file_name='auto_ownership.csv')

    nest_spec = config.get_logit_model_settings(model_settings)
    constants = config.get_model_constants(model_settings)

    choices = simulate.simple_simulate(
        choosers=households_merged.to_frame(),
        spec=model_spec,
        nest_spec=nest_spec,
        locals_d=constants,
        chunk_size=chunk_size,
        trace_label=trace_label,
        trace_choice_name='auto_ownership')

    households = households.to_frame()

    # no need to reindex as we used all households
    households['auto_ownership'] = choices

    pipeline.replace_table("households", households)

    tracing.print_summary('auto_ownership', households.auto_ownership, value_counts=True)

    if trace_hh_id:
        tracing.trace_df(households,
                         label='auto_ownership',
                         warn_if_empty=True)
Example #8
0
def run_destination_sample(spec_segment_name, tours, households_merged,
                           model_settings, skim_dict, destination_size_terms,
                           estimator, chunk_size, trace_label):

    spec = simulate.spec_for_segment(model_settings,
                                     spec_id='SAMPLE_SPEC',
                                     segment_name=spec_segment_name,
                                     estimator=estimator)

    # choosers are tours - in a sense tours are choosing their destination
    choosers = pd.merge(tours,
                        households_merged,
                        left_on='household_id',
                        right_index=True,
                        how='left')
    # FIXME - MEMORY HACK - only include columns actually used in spec
    chooser_columns = model_settings['SIMULATE_CHOOSER_COLUMNS']
    choosers = choosers[chooser_columns]

    alt_dest_col_name = model_settings["ALT_DEST_COL_NAME"]

    logger.info("running %s with %d tours", trace_label, len(choosers))

    sample_size = model_settings["SAMPLE_SIZE"]
    if estimator:
        # FIXME interaction_sample will return unsampled complete alternatives with probs and pick_count
        logger.info(
            "Estimation mode for %s using unsampled alternatives short_circuit_choices"
            % (trace_label, ))
        sample_size = 0

    # create wrapper with keys for this lookup - in this case there is a workplace_taz
    # in the choosers and a TAZ in the alternatives which get merged during interaction
    # (logit.interaction_dataset suffixes duplicate chooser column with '_chooser')
    # the skims will be available under the name "skims" for any @ expressions
    origin_col_name = model_settings['CHOOSER_ORIG_COL_NAME']
    if origin_col_name == 'TAZ':
        origin_col_name = 'TAZ_chooser'
    skims = skim_dict.wrap(origin_col_name, 'TAZ')

    locals_d = {'skims': skims}

    constants = config.get_model_constants(model_settings)
    if constants is not None:
        locals_d.update(constants)

    choices = interaction_sample(choosers,
                                 alternatives=destination_size_terms,
                                 sample_size=sample_size,
                                 alt_col_name=alt_dest_col_name,
                                 spec=spec,
                                 skims=skims,
                                 locals_d=locals_d,
                                 chunk_size=chunk_size,
                                 trace_label=trace_label)

    # remember person_id in chosen alts so we can merge with persons in subsequent steps
    # (broadcasts person_id onto all alternatives sharing the same tour_id index value)
    choices['person_id'] = choosers.person_id

    return choices
Example #9
0
def trip_mode_choice_simulate(trips_merged, trip_mode_choice_spec,
                              trip_mode_choice_settings, skim_dict, skim_stack,
                              chunk_size, trace_hh_id):
    """
    Trip mode choice simulate
    """
    trace_label = 'tour_mode_choice'

    trips = trips_merged.to_frame()

    nest_spec = config.get_logit_model_settings(trip_mode_choice_settings)
    constants = config.get_model_constants(trip_mode_choice_settings)

    logger.info("Running trip_mode_choice_simulate with %d trips" % len(trips))

    odt_skim_stack_wrapper = skim_stack.wrap(left_key='OTAZ',
                                             right_key='DTAZ',
                                             skim_key="start_period")

    od_skims = skim_dict.wrap('OTAZ', 'DTAZ')

    choices_list = []

    # loop by tour_type in order to easily query the expression coefficient file
    for tour_type, segment in trips.groupby('tour_type'):

        logger.info("running %s tour_type '%s'" % (
            len(segment.index),
            tour_type,
        ))

        # name index so tracing knows how to slice
        segment.index.name = 'trip_id'

        # FIXME - check that destination is not null

        choices = _mode_choice_simulate(
            segment,
            odt_skim_stack_wrapper=odt_skim_stack_wrapper,
            dot_skim_stack_wrapper=None,
            od_skim_stack_wrapper=od_skims,
            spec=get_segment_and_unstack(trip_mode_choice_spec, tour_type),
            constants=constants,
            nest_spec=nest_spec,
            chunk_size=chunk_size,
            trace_label=tracing.extend_trace_label(trace_label, tour_type),
            trace_choice_name='trip_mode_choice')

        # FIXME - no point in printing verbose value_counts now that we have tracing?
        tracing.print_summary('trip_mode_choice_simulate %s choices' %
                              tour_type,
                              choices,
                              value_counts=True)

        choices_list.append(choices)

        # FIXME - force garbage collection
        force_garbage_collect()

    choices = pd.concat(choices_list)

    tracing.print_summary('trip_mode_choice_simulate all tour type choices',
                          choices,
                          value_counts=True)

    # FIXME - is this a NOP if trips table doesn't exist
    inject.add_column("trips", "trip_mode", choices)

    if trace_hh_id:

        tracing.trace_df(inject.get_table('trips').to_frame(),
                         label="trip_mode",
                         slicer='trip_id',
                         index_label='trip_id',
                         warn_if_empty=True)

    force_garbage_collect()
Example #10
0
def atwork_subtour_mode_choice_simulate(tours, persons_merged,
                                        tour_mode_choice_spec,
                                        tour_mode_choice_settings, skim_dict,
                                        skim_stack, chunk_size, trace_hh_id):
    """
    At-work subtour mode choice simulate
    """

    trace_label = 'atwork_subtour_mode_choice'

    tours = tours.to_frame()
    subtours = tours[tours.tour_category == 'subtour']
    # merge persons into tours
    choosers = pd.merge(subtours,
                        persons_merged.to_frame(),
                        left_on='person_id',
                        right_index=True)

    nest_spec = config.get_logit_model_settings(tour_mode_choice_settings)
    constants = config.get_model_constants(tour_mode_choice_settings)

    logger.info("Running %s with %d subtours" %
                (trace_label, len(subtours.index)))

    tracing.print_summary('%s tour_type' % trace_label,
                          subtours.tour_type,
                          value_counts=True)

    if trace_hh_id:
        tracing.trace_df(tour_mode_choice_spec,
                         tracing.extend_trace_label(trace_label, 'spec'),
                         slicer='NONE',
                         transpose=False)

    # setup skim keys
    odt_skim_stack_wrapper = skim_stack.wrap(left_key='workplace_taz',
                                             right_key='destination',
                                             skim_key="out_period")
    dot_skim_stack_wrapper = skim_stack.wrap(left_key='destination',
                                             right_key='workplace_taz',
                                             skim_key="in_period")
    od_skims = skim_dict.wrap('workplace_taz', 'destination')

    spec = get_segment_and_unstack(tour_mode_choice_spec, segment='workbased')

    if trace_hh_id:
        tracing.trace_df(spec,
                         tracing.extend_trace_label(trace_label, 'spec'),
                         slicer='NONE',
                         transpose=False)

    choices = _mode_choice_simulate(
        choosers,
        odt_skim_stack_wrapper=odt_skim_stack_wrapper,
        dot_skim_stack_wrapper=dot_skim_stack_wrapper,
        od_skim_stack_wrapper=od_skims,
        spec=spec,
        constants=constants,
        nest_spec=nest_spec,
        chunk_size=chunk_size,
        trace_label=trace_label,
        trace_choice_name='tour_mode_choice')

    tracing.print_summary('%s choices' % trace_label,
                          choices,
                          value_counts=True)

    subtours['destination'] = choices
    assign_in_place(tours, subtours[['destination']])

    if trace_hh_id:
        trace_columns = [
            'mode', 'person_id', 'tour_type', 'tour_num', 'parent_tour_id'
        ]
        tracing.trace_df(subtours,
                         label=tracing.extend_trace_label(trace_label, 'mode'),
                         slicer='tour_id',
                         index_label='tour_id',
                         columns=trace_columns,
                         warn_if_empty=True)

    force_garbage_collect()
Example #11
0
def atwork_subtour_frequency(tours, persons_merged, chunk_size, trace_hh_id):
    """
    This model predicts the frequency of making at-work subtour tours
    (alternatives for this model come from a separate csv file which is
    configured by the user).
    """

    trace_label = 'atwork_subtour_frequency'
    model_settings_file_name = 'atwork_subtour_frequency.yaml'

    tours = tours.to_frame()
    work_tours = tours[tours.tour_type == 'work']

    # - if no work_tours
    if len(work_tours) == 0:
        add_null_results(trace_label, tours)
        return

    model_settings = config.read_model_settings(model_settings_file_name)
    estimator = estimation.manager.begin_estimation('atwork_subtour_frequency')

    model_spec = simulate.read_model_spec(file_name=model_settings['SPEC'])
    coefficients_df = simulate.read_model_coefficients(model_settings)
    model_spec = simulate.eval_coefficients(model_spec, coefficients_df,
                                            estimator)

    alternatives = simulate.read_model_alts(
        'atwork_subtour_frequency_alternatives.csv', set_index='alt')

    # merge persons into work_tours
    persons_merged = persons_merged.to_frame()
    work_tours = pd.merge(work_tours,
                          persons_merged,
                          left_on='person_id',
                          right_index=True)

    logger.info("Running atwork_subtour_frequency with %d work tours",
                len(work_tours))

    nest_spec = config.get_logit_model_settings(model_settings)
    constants = config.get_model_constants(model_settings)

    # - preprocessor
    preprocessor_settings = model_settings.get('preprocessor', None)
    if preprocessor_settings:

        assign_columns(df=work_tours,
                       model_settings=preprocessor_settings,
                       trace_label=trace_label)

    if estimator:
        estimator.write_spec(model_settings)
        estimator.write_model_settings(model_settings,
                                       model_settings_file_name)
        estimator.write_coefficients(coefficients_df)
        estimator.write_choosers(work_tours)

    choices = simulate.simple_simulate(
        choosers=work_tours,
        spec=model_spec,
        nest_spec=nest_spec,
        locals_d=constants,
        chunk_size=chunk_size,
        trace_label=trace_label,
        trace_choice_name='atwork_subtour_frequency',
        estimator=estimator)

    # convert indexes to alternative names
    choices = pd.Series(model_spec.columns[choices.values],
                        index=choices.index)

    if estimator:
        estimator.write_choices(choices)
        choices = estimator.get_survey_values(choices, 'tours',
                                              'atwork_subtour_frequency')
        estimator.write_override_choices(choices)
        estimator.end_estimation()

    # add atwork_subtour_frequency column to tours
    # reindex since we are working with a subset of tours
    tours['atwork_subtour_frequency'] = choices.reindex(tours.index)
    pipeline.replace_table("tours", tours)

    # - create atwork_subtours based on atwork_subtour_frequency choice names
    work_tours = tours[tours.tour_type == 'work']
    assert not work_tours.atwork_subtour_frequency.isnull().any()

    subtours = process_atwork_subtours(work_tours, alternatives)

    tours = pipeline.extend_table("tours", subtours)

    tracing.register_traceable_table('tours', subtours)
    pipeline.get_rn_generator().add_channel('tours', subtours)

    tracing.print_summary('atwork_subtour_frequency',
                          tours.atwork_subtour_frequency,
                          value_counts=True)

    if trace_hh_id:
        tracing.trace_df(tours, label='atwork_subtour_frequency.tours')
Example #12
0
def trip_mode_choice(
        trips,
        tours_merged,
        skim_dict, skim_stack,
        chunk_size, trace_hh_id):
    """
    Trip mode choice - compute trip_mode (same values as for tour_mode) for each trip.

    Modes for each primary tour putpose are calculated separately because they have different
    coefficient values (stored in trip_mode_choice_coeffs.csv coefficient file.)

    Adds trip_mode column to trip table
    """
    trace_label = 'trip_mode_choice'
    model_settings = config.read_model_settings('trip_mode_choice.yaml')

    model_spec = \
        simulate.read_model_spec(file_name=model_settings['SPEC'])
    omnibus_coefficients = \
        assign.read_constant_spec(config.config_file_path(model_settings['COEFFS']))

    trips_df = trips.to_frame()
    logger.info("Running %s with %d trips", trace_label, trips_df.shape[0])

    tours_merged = tours_merged.to_frame()
    tours_merged = tours_merged[model_settings['TOURS_MERGED_CHOOSER_COLUMNS']]

    nest_spec = config.get_logit_model_settings(model_settings)

    tracing.print_summary('primary_purpose',
                          trips_df.primary_purpose, value_counts=True)

    # - trips_merged - merge trips and tours_merged
    trips_merged = pd.merge(
        trips_df,
        tours_merged,
        left_on='tour_id',
        right_index=True,
        how="left")
    assert trips_merged.index.equals(trips.index)

    # setup skim keys
    assert ('trip_period' not in trips_merged)
    trips_merged['trip_period'] = skim_time_period_label(trips_merged.depart)

    orig_col = 'origin'
    dest_col = 'destination'

    odt_skim_stack_wrapper = skim_stack.wrap(left_key=orig_col, right_key=dest_col,
                                             skim_key='trip_period')
    od_skim_wrapper = skim_dict.wrap('origin', 'destination')

    skims = {
        "odt_skims": odt_skim_stack_wrapper,
        "od_skims": od_skim_wrapper,
    }

    constants = config.get_model_constants(model_settings)
    constants.update({
        'ORIGIN': orig_col,
        'DESTINATION': dest_col
    })

    choices_list = []
    for primary_purpose, trips_segment in trips_merged.groupby('primary_purpose'):

        segment_trace_label = tracing.extend_trace_label(trace_label, primary_purpose)

        logger.info("trip_mode_choice tour_type '%s' (%s trips)" %
                    (primary_purpose, len(trips_segment.index), ))

        # name index so tracing knows how to slice
        assert trips_segment.index.name == 'trip_id'

        locals_dict = assign.evaluate_constants(omnibus_coefficients[primary_purpose],
                                                constants=constants)
        locals_dict.update(constants)

        annotate_preprocessors(
            trips_segment, locals_dict, skims,
            model_settings, segment_trace_label)

        locals_dict.update(skims)
        choices = simulate.simple_simulate(
            choosers=trips_segment,
            spec=model_spec,
            nest_spec=nest_spec,
            skims=skims,
            locals_d=locals_dict,
            chunk_size=chunk_size,
            trace_label=segment_trace_label,
            trace_choice_name='trip_mode_choice')

        alts = model_spec.columns
        choices = choices.map(dict(list(zip(list(range(len(alts))), alts))))

        # tracing.print_summary('trip_mode_choice %s choices' % primary_purpose,
        #                       choices, value_counts=True)

        if trace_hh_id:
            # trace the coefficients
            tracing.trace_df(pd.Series(locals_dict),
                             label=tracing.extend_trace_label(segment_trace_label, 'constants'),
                             transpose=False,
                             slicer='NONE')

            # so we can trace with annotations
            trips_segment['trip_mode'] = choices
            tracing.trace_df(trips_segment,
                             label=tracing.extend_trace_label(segment_trace_label, 'trip_mode'),
                             slicer='tour_id',
                             index_label='tour_id',
                             warn_if_empty=True)

        choices_list.append(choices)

        # FIXME - force garbage collection
        force_garbage_collect()

    choices = pd.concat(choices_list)

    trips_df = trips.to_frame()
    trips_df['trip_mode'] = choices

    tracing.print_summary('tour_modes',
                          trips_merged.tour_mode, value_counts=True)

    tracing.print_summary('trip_mode_choice choices',
                          choices, value_counts=True)

    assert not trips_df.trip_mode.isnull().any()

    pipeline.replace_table("trips", trips_df)

    if trace_hh_id:
        tracing.trace_df(trips_df,
                         label=tracing.extend_trace_label(trace_label, 'trip_mode'),
                         slicer='trip_id',
                         index_label='trip_id',
                         warn_if_empty=True)
Example #13
0
def compute_logsums(
        primary_purpose,
        trips,
        destination_sample,
        tours_merged,
        model_settings,
        skims,
        chunk_size, trace_hh_id,
        trace_label):
    """
    Calculate mode choice logsums using the same recipe as for trip_mode_choice, but do it twice
    for each alternative since we need out-of-direction logsum
    (i.e . origin to alt_dest, and alt_dest to half-tour destination)

    Returns
    -------
        adds od_logsum and dp_logsum columns to trips (in place)
    """
    trace_label = tracing.extend_trace_label(trace_label, 'compute_logsums')
    logger.info("Running %s with %d samples", trace_label, destination_sample.shape[0])

    # - trips_merged - merge trips and tours_merged
    trips_merged = pd.merge(
        trips,
        tours_merged,
        left_on='tour_id',
        right_index=True,
        how="left")
    assert trips_merged.index.equals(trips.index)

    # - choosers - merge destination_sample and trips_merged
    # re/set index because pandas merge does not preserve left index if it has duplicate values!
    choosers = pd.merge(destination_sample,
                        trips_merged.reset_index(),
                        left_index=True,
                        right_on='trip_id',
                        how="left",
                        suffixes=('', '_r')).set_index('trip_id')
    assert choosers.index.equals(destination_sample.index)

    logsum_settings = config.read_model_settings(model_settings['LOGSUM_SETTINGS'])

    omnibus_coefficient_spec = \
        assign.read_constant_spec(config.config_file_path(logsum_settings['COEFFS']))

    coefficient_spec = omnibus_coefficient_spec[primary_purpose]

    constants = config.get_model_constants(logsum_settings)
    locals_dict = assign.evaluate_constants(coefficient_spec, constants=constants)
    locals_dict.update(constants)

    # - od_logsums
    od_skims = {
        'ORIGIN': model_settings['TRIP_ORIGIN'],
        'DESTINATION': model_settings['ALT_DEST'],
        "odt_skims": skims['odt_skims'],
        "od_skims": skims['od_skims'],
    }
    destination_sample['od_logsum'] = compute_ood_logsums(
        choosers,
        logsum_settings,
        od_skims,
        locals_dict,
        chunk_size,
        trace_label=tracing.extend_trace_label(trace_label, 'od'))

    # - dp_logsums
    dp_skims = {
        'ORIGIN': model_settings['ALT_DEST'],
        'DESTINATION': model_settings['PRIMARY_DEST'],
        "odt_skims": skims['dpt_skims'],
        "od_skims": skims['dp_skims'],
    }
    destination_sample['dp_logsum'] = compute_ood_logsums(
        choosers,
        logsum_settings,
        dp_skims,
        locals_dict,
        chunk_size,
        trace_label=tracing.extend_trace_label(trace_label, 'dp'))
Example #14
0
def compute_accessibility(accessibility, network_los, land_use, trace_od):
    """
    Compute accessibility for each zone in land use file using expressions from accessibility_spec

    The actual results depend on the expressions in accessibility_spec, but this is initially
    intended to permit implementation of the mtc accessibility calculation as implemented by
    Accessibility.job

    Compute measures of accessibility used by the automobile ownership model.
    The accessibility measure first multiplies an employment variable by a mode-specific decay
    function.  The product reflects the difficulty of accessing the activities the farther
    (in terms of round-trip travel time) the jobs are from the location in question. The products
    to each destination zone are next summed over each origin zone, and the logarithm of the
    product mutes large differences.  The decay function on the walk accessibility measure is
    steeper than automobile or transit.  The minimum accessibility is zero.
    """

    trace_label = 'compute_accessibility'
    model_settings = config.read_model_settings('accessibility.yaml')
    assignment_spec = assign.read_assignment_spec(
        config.config_file_path('accessibility.csv'))

    accessibility_df = accessibility.to_frame()

    logger.info("Running %s with %d dest zones" %
                (trace_label, len(accessibility_df)))

    constants = config.get_model_constants(model_settings)

    land_use_columns = model_settings.get('land_use_columns', [])
    land_use_df = land_use.to_frame()
    land_use_df = land_use_df[land_use_columns]

    # don't assume they are the same: accessibility may be sliced if we are multiprocessing
    orig_zones = accessibility_df.index.values
    dest_zones = land_use_df.index.values

    orig_zone_count = len(orig_zones)
    dest_zone_count = len(dest_zones)

    logger.info("Running %s with %d dest zones %d orig zones" %
                (trace_label, dest_zone_count, orig_zone_count))

    # create OD dataframe
    od_df = pd.DataFrame(
        data={
            'orig': np.repeat(orig_zones, dest_zone_count),
            'dest': np.tile(dest_zones, orig_zone_count)
        })

    if trace_od:
        trace_orig, trace_dest = trace_od
        trace_od_rows = (od_df.orig == trace_orig) & (od_df.dest == trace_dest)
    else:
        trace_od_rows = None

    # merge land_use_columns into od_df
    od_df = pd.merge(od_df, land_use_df, left_on='dest',
                     right_index=True).sort_index()

    locals_d = {
        'log': np.log,
        'exp': np.exp,
        'network_los': network_los,
    }

    skim_dict = network_los.get_default_skim_dict()
    locals_d['skim_od'] = skim_dict.wrap('orig', 'dest').set_df(od_df)
    locals_d['skim_do'] = skim_dict.wrap('dest', 'orig').set_df(od_df)

    if network_los.zone_system == los.THREE_ZONE:
        locals_d['tvpb'] = TransitVirtualPathBuilder(network_los)

    if constants is not None:
        locals_d.update(constants)

    results, trace_results, trace_assigned_locals \
        = assign.assign_variables(assignment_spec, od_df, locals_d, trace_rows=trace_od_rows)

    for column in results.columns:
        data = np.asanyarray(results[column])
        data.shape = (orig_zone_count, dest_zone_count)  # (o,d)
        accessibility_df[column] = np.log(np.sum(data, axis=1) + 1)

    logger.info("{trace_label} added {len(results.columns} columns")

    # - write table to pipeline
    pipeline.replace_table("accessibility", accessibility_df)

    if trace_od:

        if not trace_od_rows.any():
            logger.warning(
                f"trace_od not found origin = {trace_orig}, dest = {trace_dest}"
            )
        else:

            # add OD columns to trace results
            df = pd.concat([od_df[trace_od_rows], trace_results], axis=1)

            # dump the trace results table (with _temp variables) to aid debugging
            tracing.trace_df(df,
                             label='accessibility',
                             index_label='skim_offset',
                             slicer='NONE',
                             warn_if_empty=True)

            if trace_assigned_locals:
                tracing.write_csv(trace_assigned_locals,
                                  file_name="accessibility_locals")
Example #15
0
def cdap_simulate(persons_merged, persons, households,
                  cdap_indiv_spec,
                  cdap_interaction_coefficients,
                  cdap_fixed_relative_proportions,
                  chunk_size, trace_hh_id):
    """
    CDAP stands for Coordinated Daily Activity Pattern, which is a choice of
    high-level activity pattern for each person, in a coordinated way with other
    members of a person's household.

    Because Python requires vectorization of computation, there are some specialized
    routines in the cdap directory of activitysim for this purpose.  This module
    simply applies those utilities using the simulation framework.
    """

    trace_label = 'cdap'
    model_settings = config.read_model_settings('cdap.yaml')

    persons_merged = persons_merged.to_frame()

    constants = config.get_model_constants(model_settings)

    cdap_interaction_coefficients = \
        cdap.preprocess_interaction_coefficients(cdap_interaction_coefficients)

    # specs are built just-in-time on demand and cached as injectables
    # prebuilding here allows us to write them to the output directory
    # (also when multiprocessing locutor might not see all household sizes)
    logger.info("Pre-building cdap specs")
    for hhsize in range(2, cdap.MAX_HHSIZE + 1):
        spec = cdap.build_cdap_spec(cdap_interaction_coefficients, hhsize, cache=True)
        if inject.get_injectable('locutor', False):
            spec.to_csv(config.output_file_path('cdap_spec_%s.csv' % hhsize), index=True)

    logger.info("Running cdap_simulate with %d persons", len(persons_merged.index))

    choices = cdap.run_cdap(
        persons=persons_merged,
        cdap_indiv_spec=cdap_indiv_spec,
        cdap_interaction_coefficients=cdap_interaction_coefficients,
        cdap_fixed_relative_proportions=cdap_fixed_relative_proportions,
        locals_d=constants,
        chunk_size=chunk_size,
        trace_hh_id=trace_hh_id,
        trace_label=trace_label)

    # - assign results to persons table and annotate
    persons = persons.to_frame()

    choices = choices.reindex(persons.index)
    persons['cdap_activity'] = choices.cdap_activity
    persons['cdap_rank'] = choices.cdap_rank

    expressions.assign_columns(
        df=persons,
        model_settings=model_settings.get('annotate_persons'),
        trace_label=tracing.extend_trace_label(trace_label, 'annotate_persons'))

    pipeline.replace_table("persons", persons)

    # - annotate households table
    households = households.to_frame()
    expressions.assign_columns(
        df=households,
        model_settings=model_settings.get('annotate_households'),
        trace_label=tracing.extend_trace_label(trace_label, 'annotate_households'))
    pipeline.replace_table("households", households)

    tracing.print_summary('cdap_activity', persons.cdap_activity, value_counts=True)
    logger.info("cdap crosstabs:\n%s" %
                pd.crosstab(persons.ptype, persons.cdap_activity, margins=True))

    if trace_hh_id:

        tracing.trace_df(inject.get_table('persons_merged').to_frame(),
                         label="cdap",
                         columns=['ptype', 'cdap_rank', 'cdap_activity'],
                         warn_if_empty=True)
Example #16
0
def run_trip_purpose(
        trips_df,
        chunk_size,
        trace_hh_id,
        trace_label):
    """
    trip purpose - main functionality separated from model step so it can be called iteratively

    For each intermediate stop on a tour (i.e. trip other than the last trip outbound or inbound)
    Each trip is assigned a purpose based on an observed frequency distribution

    The distribution is segmented by tour purpose, tour direction, person type,
    and, optionally, trip depart time .

    Returns
    -------
    purpose: pandas.Series of purpose (str) indexed by trip_id
    """

    model_settings = config.read_model_settings('trip_purpose.yaml')
    probs_spec = trip_purpose_probs()

    result_list = []

    # - last trip of outbound tour gets primary_purpose
    last_trip = (trips_df.trip_num == trips_df.trip_count)
    purpose = trips_df.primary_purpose[last_trip & trips_df.outbound]
    result_list.append(purpose)
    logger.info("assign purpose to %s last outbound trips", purpose.shape[0])

    # - last trip of inbound tour gets home (or work for atwork subtours)
    purpose = trips_df.primary_purpose[last_trip & ~trips_df.outbound]
    purpose = pd.Series(np.where(purpose == 'atwork', 'Work', 'Home'), index=purpose.index)
    result_list.append(purpose)
    logger.info("assign purpose to %s last inbound trips", purpose.shape[0])

    # - intermediate stops (non-last trips) purpose assigned by probability table
    trips_df = trips_df[~last_trip]
    logger.info("assign purpose to %s intermediate trips", trips_df.shape[0])

    preprocessor_settings = model_settings.get('preprocessor', None)
    if preprocessor_settings:
        locals_dict = config.get_model_constants(model_settings)
        expressions.assign_columns(
            df=trips_df,
            model_settings=preprocessor_settings,
            locals_dict=locals_dict,
            trace_label=trace_label)

    rows_per_chunk, effective_chunk_size = \
        trip_purpose_rpc(chunk_size, trips_df, probs_spec, trace_label=trace_label)

    for i, num_chunks, trips_chunk in chunk.chunked_choosers(trips_df, rows_per_chunk):

        logger.info("Running chunk %s of %s size %d", i, num_chunks, len(trips_chunk))

        chunk_trace_label = tracing.extend_trace_label(trace_label, 'chunk_%s' % i) \
            if num_chunks > 1 else trace_label

        chunk.log_open(chunk_trace_label, chunk_size, effective_chunk_size)

        choices = choose_intermediate_trip_purpose(
            trips_chunk,
            probs_spec,
            trace_hh_id,
            trace_label=chunk_trace_label)

        chunk.log_close(chunk_trace_label)

        result_list.append(choices)

    if len(result_list) > 1:
        choices = pd.concat(result_list)

    return choices
def atwork_subtour_scheduling(
        tours,
        persons_merged,
        tdd_alts,
        skim_dict,
        chunk_size,
        trace_hh_id):
    """
    This model predicts the departure time and duration of each activity for at work subtours tours
    """

    trace_label = 'atwork_subtour_scheduling'
    model_settings = config.read_model_settings('tour_scheduling_atwork.yaml')
    model_spec = simulate.read_model_spec(file_name='tour_scheduling_atwork.csv')

    persons_merged = persons_merged.to_frame()

    tours = tours.to_frame()
    subtours = tours[tours.tour_category == 'atwork']

    # - if no atwork subtours
    if subtours.shape[0] == 0:
        tracing.no_results(trace_label)
        return

    logger.info("Running %s with %d tours", trace_label, len(subtours))

    # preprocessor
    constants = config.get_model_constants(model_settings)
    od_skim_wrapper = skim_dict.wrap('origin', 'destination')
    do_skim_wrapper = skim_dict.wrap('destination', 'origin')
    skims = {
        "od_skims": od_skim_wrapper,
        "do_skims": do_skim_wrapper,
    }
    annotate_preprocessors(
        subtours, constants, skims,
        model_settings, trace_label)

    # parent_tours table with columns ['tour_id', 'tdd'] index = tour_id
    parent_tour_ids = subtours.parent_tour_id.astype(int).unique()
    parent_tours = pd.DataFrame({'tour_id': parent_tour_ids}, index=parent_tour_ids)
    parent_tours = parent_tours.merge(tours[['tdd']], left_index=True, right_index=True)

    tdd_choices = vectorize_subtour_scheduling(
        parent_tours,
        subtours,
        persons_merged,
        tdd_alts, model_spec,
        model_settings,
        chunk_size=chunk_size,
        trace_label=trace_label)

    assign_in_place(tours, tdd_choices)
    pipeline.replace_table("tours", tours)

    if trace_hh_id:
        tracing.trace_df(tours[tours.tour_category == 'atwork'],
                         label="atwork_subtour_scheduling",
                         slicer='person_id',
                         index_label='tour_id',
                         columns=None)

    if DUMP:
        subtours = tours[tours.tour_category == 'atwork']
        parent_tours = tours[tours.index.isin(subtours.parent_tour_id)]

        tracing.dump_df(DUMP, subtours, trace_label, 'sub_tours')
        tracing.dump_df(DUMP, parent_tours, trace_label, 'parent_tours')

        parent_tours['parent_tour_id'] = parent_tours.index
        subtours = pd.concat([parent_tours, subtours])
        tracing.dump_df(DUMP,
                        tt.tour_map(parent_tours, subtours, tdd_alts,
                                    persons_id_col='parent_tour_id'),
                        trace_label, 'tour_map')
Example #18
0
def run_destination_simulate(
        spec_segment_name,
        tours,
        persons_merged,
        destination_sample,
        model_settings,
        skim_dict,
        destination_size_terms,
        chunk_size, trace_label):
    """
    run destination_simulate on tour_destination_sample
    annotated with mode_choice logsum to select a destination from sample alternatives
    """

    model_spec_file_name = model_settings['SPEC']
    model_spec = simulate.read_model_spec(file_name=model_spec_file_name)
    model_spec = model_spec[[spec_segment_name]]

    # merge persons into tours
    choosers = pd.merge(tours,
                        persons_merged,
                        left_on='person_id', right_index=True, how='left')
    # FIXME - MEMORY HACK - only include columns actually used in spec
    chooser_columns = model_settings['SIMULATE_CHOOSER_COLUMNS']
    choosers = choosers[chooser_columns]

    alt_dest_col_name = model_settings["ALT_DEST_COL_NAME"]
    origin_col_name = model_settings['CHOOSER_ORIG_COL_NAME']

    # alternatives are pre-sampled and annotated with logsums and pick_count
    # but we have to merge size_terms column into alt sample list
    destination_sample['size_term'] = \
        reindex(destination_size_terms.size_term, destination_sample[alt_dest_col_name])

    tracing.dump_df(DUMP, destination_sample, trace_label, 'alternatives')

    constants = config.get_model_constants(model_settings)

    logger.info("Running tour_destination_simulate with %d persons", len(choosers))

    # create wrapper with keys for this lookup - in this case there is a TAZ in the choosers
    # and a TAZ in the alternatives which get merged during interaction
    # the skims will be available under the name "skims" for any @ expressions
    skims = skim_dict.wrap(origin_col_name, alt_dest_col_name)

    locals_d = {
        'skims': skims,
    }
    if constants is not None:
        locals_d.update(constants)

    tracing.dump_df(DUMP, choosers, trace_label, 'choosers')

    choices = interaction_sample_simulate(
        choosers,
        destination_sample,
        spec=model_spec,
        choice_column=alt_dest_col_name,
        skims=skims,
        locals_d=locals_d,
        chunk_size=chunk_size,
        trace_label=trace_label,
        trace_choice_name='destination')

    return choices
Example #19
0
def stop_frequency(
        tours, tours_merged,
        stop_frequency_alts,
        skim_dict,
        chunk_size,
        trace_hh_id):
    """
    stop frequency model

    For each tour, shoose a number of intermediate inbound stops and outbound stops.
    Create a trip table with inbound and outbound trips.

    Thus, a tour with stop_frequency '2out_0in' will have two outbound and zero inbound stops,
    and four corresponding trips: three outbound, and one inbound.

    Adds stop_frequency str column to trips, with fields

    creates trips table with columns:

    ::

        - person_id
        - household_id
        - tour_id
        - primary_purpose
        - atwork
        - trip_num
        - outbound
        - trip_count

    """

    trace_label = 'stop_frequency'
    model_settings = config.read_model_settings('stop_frequency.yaml')

    tours = tours.to_frame()
    tours_merged = tours_merged.to_frame()

    assert not tours_merged.household_id.isnull().any()

    assert not (tours_merged.origin == -1).any()
    assert not (tours_merged.destination == -1).any()

    nest_spec = config.get_logit_model_settings(model_settings)
    constants = config.get_model_constants(model_settings)

    # - run preprocessor to annotate tours_merged
    preprocessor_settings = model_settings.get('preprocessor', None)
    if preprocessor_settings:

        # hack: preprocessor adds origin column in place if it does not exist already
        od_skim_stack_wrapper = skim_dict.wrap('origin', 'destination')
        skims = [od_skim_stack_wrapper]

        locals_dict = {
            "od_skims": od_skim_stack_wrapper
        }
        if constants is not None:
            locals_dict.update(constants)

        simulate.set_skim_wrapper_targets(tours_merged, skims)

        # this should be pre-slice as some expressions may count tours by type
        annotations = expressions.compute_columns(
            df=tours_merged,
            model_settings=preprocessor_settings,
            locals_dict=locals_dict,
            trace_label=trace_label)

        assign_in_place(tours_merged, annotations)

    tracing.print_summary('stop_frequency segments',
                          tours_merged.primary_purpose, value_counts=True)

    choices_list = []
    for segment_type, choosers in tours_merged.groupby('primary_purpose'):

        logging.info("%s running segment %s with %s chooser rows" %
                     (trace_label, segment_type, choosers.shape[0]))

        spec = simulate.read_model_spec(file_name='stop_frequency_%s.csv' % segment_type)

        assert spec is not None, "spec for segment_type %s not found" % segment_type

        choices = simulate.simple_simulate(
            choosers=choosers,
            spec=spec,
            nest_spec=nest_spec,
            locals_d=constants,
            chunk_size=chunk_size,
            trace_label=tracing.extend_trace_label(trace_label, segment_type),
            trace_choice_name='stops')

        # convert indexes to alternative names
        choices = pd.Series(spec.columns[choices.values], index=choices.index)

        choices_list.append(choices)

    choices = pd.concat(choices_list)

    tracing.print_summary('stop_frequency', choices, value_counts=True)

    # add stop_frequency choices to tours table
    assign_in_place(tours, choices.to_frame('stop_frequency'))

    if 'primary_purpose' not in tours.columns:
        assign_in_place(tours, tours_merged[['primary_purpose']])

    pipeline.replace_table("tours", tours)

    # create trips table
    trips = process_trips(tours, stop_frequency_alts)
    trips = pipeline.extend_table("trips", trips)
    tracing.register_traceable_table('trips', trips)
    pipeline.get_rn_generator().add_channel('trips', trips)

    if trace_hh_id:
        tracing.trace_df(tours,
                         label="stop_frequency.tours",
                         slicer='person_id',
                         columns=None)

        tracing.trace_df(trips,
                         label="stop_frequency.trips",
                         slicer='person_id',
                         columns=None)

        tracing.trace_df(annotations,
                         label="stop_frequency.annotations",
                         columns=None)

        tracing.trace_df(tours_merged,
                         label="stop_frequency.tours_merged",
                         slicer='person_id',
                         columns=None)
def atwork_subtour_destination_simulate(
        subtours,
        persons_merged,
        destination_sample,
        skim_dict,
        destination_size_terms,
        chunk_size, trace_hh_id):
    """
    atwork_subtour_destination model on atwork_subtour_destination_sample
    annotated with mode_choice logsum to select a destination from sample alternatives
    """

    trace_label = 'atwork_subtour_destination_simulate'

    model_settings = config.read_model_settings('atwork_subtour_destination.yaml')
    model_spec = simulate.read_model_spec(file_name='atwork_subtour_destination.csv')

    # interaction_sample_simulate insists choosers appear in same order as alts
    subtours = subtours.sort_index()

    # merge persons into tours
    choosers = pd.merge(subtours,
                        persons_merged,
                        left_on='person_id', right_index=True)
    # FIXME - MEMORY HACK - only include columns actually used in spec
    chooser_columns = model_settings['SIMULATE_CHOOSER_COLUMNS']
    choosers = choosers[chooser_columns]

    alt_dest_col_name = model_settings["ALT_DEST_COL_NAME"]
    chooser_col_name = 'workplace_taz'

    # alternatives are pre-sampled and annotated with logsums and pick_count
    # but we have to merge destination_size_terms columns into alt sample list
    alternatives = \
        pd.merge(destination_sample, destination_size_terms,
                 left_on=alt_dest_col_name, right_index=True, how="left")

    tracing.dump_df(DUMP, alternatives, trace_label, 'alternatives')

    constants = config.get_model_constants(model_settings)

    logger.info("Running atwork_subtour_destination_simulate with %d persons", len(choosers))

    # create wrapper with keys for this lookup - in this case there is a TAZ in the choosers
    # and a TAZ in the alternatives which get merged during interaction
    # the skims will be available under the name "skims" for any @ expressions
    skims = skim_dict.wrap(chooser_col_name, alt_dest_col_name)

    locals_d = {
        'skims': skims,
    }
    if constants is not None:
        locals_d.update(constants)

    tracing.dump_df(DUMP, choosers, trace_label, 'choosers')

    choices = interaction_sample_simulate(
        choosers,
        alternatives,
        spec=model_spec,
        choice_column=alt_dest_col_name,
        skims=skims,
        locals_d=locals_d,
        chunk_size=chunk_size,
        trace_label=trace_label,
        trace_choice_name='workplace_location')

    return choices
def non_mandatory_tour_scheduling(tours, persons_merged, tdd_alts, chunk_size,
                                  trace_hh_id):
    """
    This model predicts the departure time and duration of each activity for non-mandatory tours
    """

    trace_label = 'non_mandatory_tour_scheduling'
    model_settings_file_name = 'non_mandatory_tour_scheduling.yaml'
    model_settings = config.read_model_settings(model_settings_file_name)

    tours = tours.to_frame()
    non_mandatory_tours = tours[tours.tour_category == 'non_mandatory']

    logger.info("Running non_mandatory_tour_scheduling with %d tours",
                len(tours))

    persons_merged = persons_merged.to_frame()

    if 'SIMULATE_CHOOSER_COLUMNS' in model_settings:
        persons_merged =\
            expressions.filter_chooser_columns(persons_merged,
                                               model_settings['SIMULATE_CHOOSER_COLUMNS'])

    constants = config.get_model_constants(model_settings)

    # - run preprocessor to annotate choosers
    preprocessor_settings = model_settings.get('preprocessor', None)
    if preprocessor_settings:

        locals_d = {}
        if constants is not None:
            locals_d.update(constants)

        expressions.assign_columns(df=non_mandatory_tours,
                                   model_settings=preprocessor_settings,
                                   locals_dict=locals_d,
                                   trace_label=trace_label)

    timetable = inject.get_injectable("timetable")

    estimator = estimation.manager.begin_estimation(
        'non_mandatory_tour_scheduling')

    model_spec = simulate.read_model_spec(file_name=model_settings['SPEC'])
    coefficients_df = simulate.read_model_coefficients(model_settings)
    model_spec = simulate.eval_coefficients(model_spec, coefficients_df,
                                            estimator)

    if estimator:
        estimator.write_model_settings(model_settings,
                                       model_settings_file_name)
        estimator.write_spec(model_settings)
        estimator.write_coefficients(coefficients_df)
        timetable.begin_transaction(estimator)

    # - non_mandatory tour scheduling is not segmented by tour type
    spec_info = {'spec': model_spec, 'estimator': estimator}

    choices = vectorize_tour_scheduling(non_mandatory_tours,
                                        persons_merged,
                                        tdd_alts,
                                        timetable,
                                        tour_segments=spec_info,
                                        tour_segment_col=None,
                                        model_settings=model_settings,
                                        chunk_size=chunk_size,
                                        trace_label=trace_label)

    if estimator:
        estimator.write_choices(choices)
        choices = estimator.get_survey_values(choices, 'tours', 'tdd')
        estimator.write_override_choices(choices)
        estimator.end_estimation()

        # update timetable to reflect the override choices (assign tours in tour_num order)
        timetable.rollback()
        for tour_num, nth_tours in non_mandatory_tours.groupby('tour_num',
                                                               sort=True):
            timetable.assign(window_row_ids=nth_tours['person_id'],
                             tdds=choices.reindex(nth_tours.index))

    timetable.replace_table()

    # choices are tdd alternative ids
    # we want to add start, end, and duration columns to tours, which we have in tdd_alts table
    choices = pd.merge(choices.to_frame('tdd'),
                       tdd_alts,
                       left_on=['tdd'],
                       right_index=True,
                       how='left')

    assign_in_place(tours, choices)
    pipeline.replace_table("tours", tours)

    # updated df for tracing
    non_mandatory_tours = tours[tours.tour_category == 'non_mandatory']

    tracing.dump_df(DUMP,
                    tt.tour_map(persons_merged, non_mandatory_tours, tdd_alts),
                    trace_label, 'tour_map')

    if trace_hh_id:
        tracing.trace_df(non_mandatory_tours,
                         label="non_mandatory_tour_scheduling",
                         slicer='person_id',
                         index_label='tour_id',
                         columns=None,
                         warn_if_empty=True)
def transit_pass_subsidy(persons_merged, persons, chunk_size, trace_hh_id):
    """
    Transit pass subsidy model.
    """

    trace_label = 'transit_pass_subsidy'
    model_settings_file_name = 'transit_pass_subsidy.yaml'

    choosers = persons_merged.to_frame()
    logger.info("Running %s with %d persons", trace_label, len(choosers))

    model_settings = config.read_model_settings(model_settings_file_name)
    estimator = estimation.manager.begin_estimation('transit_pass_subsidy')

    constants = config.get_model_constants(model_settings)

    # - preprocessor
    preprocessor_settings = model_settings.get('preprocessor', None)
    if preprocessor_settings:

        locals_d = {}
        if constants is not None:
            locals_d.update(constants)

        expressions.assign_columns(df=choosers,
                                   model_settings=preprocessor_settings,
                                   locals_dict=locals_d,
                                   trace_label=trace_label)

    model_spec = simulate.read_model_spec(file_name=model_settings['SPEC'])
    coefficients_df = simulate.read_model_coefficients(model_settings)
    model_spec = simulate.eval_coefficients(model_spec, coefficients_df,
                                            estimator)

    nest_spec = config.get_logit_model_settings(model_settings)

    if estimator:
        estimator.write_model_settings(model_settings,
                                       model_settings_file_name)
        estimator.write_spec(model_settings)
        estimator.write_coefficients(coefficients_df)
        estimator.write_choosers(choosers)

    choices = simulate.simple_simulate(
        choosers=choosers,
        spec=model_spec,
        nest_spec=nest_spec,
        locals_d=constants,
        chunk_size=chunk_size,
        trace_label=trace_label,
        trace_choice_name='transit_pass_subsidy',
        estimator=estimator)

    if estimator:
        estimator.write_choices(choices)
        choices = estimator.get_survey_values(choices, 'persons',
                                              'transit_pass_subsidy')
        estimator.write_override_choices(choices)
        estimator.end_estimation()

    persons = persons.to_frame()
    persons['transit_pass_subsidy'] = choices.reindex(persons.index)

    pipeline.replace_table("persons", persons)

    tracing.print_summary('transit_pass_subsidy',
                          persons.transit_pass_subsidy,
                          value_counts=True)

    if trace_hh_id:
        tracing.trace_df(persons, label=trace_label, warn_if_empty=True)
Example #23
0
def _schedule_tours(tours, persons_merged, alts, spec, logsum_tour_purpose,
                    model_settings, skims, timetable, window_id_col,
                    previous_tour, tour_owner_id_col, estimator,
                    tour_trace_label):
    """
    previous_tour stores values used to add columns that can be used in the spec
    which have to do with the previous tours per person.  Every column in the
    alternatives table is appended with the suffix "_previous" and made
    available.  So if your alternatives table has columns for start and end,
    then start_previous and end_previous will be set to the start and end of
    the most recent tour for a person.  The first time through,
    start_previous and end_previous are undefined, so make sure to protect
    with a tour_num >= 2 in the variable computation.

    Parameters
    ----------
    tours : DataFrame
        chunk of tours to schedule with unique timetable window_id_col
    persons_merged : DataFrame
        DataFrame of persons to be merged with tours containing attributes referenced
        by expressions in spec
    alts : DataFrame
        DataFrame of alternatives which represent all possible time slots.
        tdd_interaction_dataset function will use timetable to filter them to omit
        unavailable alternatives
    spec : DataFrame
        The spec which will be passed to interaction_simulate.
    model_settings : dict
    timetable : TimeTable
        timetable of timewidows for person (or subtour) with rows for tours[window_id_col]
    window_id_col : str
        column name from tours that identifies timetable owner (or None if tours index)
        - person_id for non/mandatory tours
        - parent_tour_id for subtours,
        - None (tours index) for joint_tours since every tour may have different participants)
    previous_tour: Series
        series with value of tdd_alt choice for last previous tour scheduled for
    tour_owner_id_col : str
        column name from tours that identifies 'owner' of this tour
        (person_id for non/mandatory tours, parent_tour_id for subtours,
        household_id for joint_tours)
    tour_trace_label

    Returns
    -------

    """

    logger.info("%s schedule_tours running %d tour choices" %
                (tour_trace_label, len(tours)))

    # merge persons into tours
    # avoid dual suffix for redundant columns names (e.g. household_id) that appear in both
    tours = pd.merge(tours,
                     persons_merged,
                     left_on='person_id',
                     right_index=True,
                     suffixes=('', '_y'))
    chunk.log_df(tour_trace_label, "tours", tours)

    # - add explicit window_id_col for timetable owner if it is index
    # if no timetable window_id_col specified, then add index as an explicit column
    # (this is not strictly necessary but its presence makes code simpler in several places)
    if window_id_col is None:
        window_id_col = tours.index.name
        tours[window_id_col] = tours.index

    # timetable can't handle multiple tours per window_id
    assert not tours[window_id_col].duplicated().any()

    # - build interaction dataset filtered to include only available tdd alts
    # dataframe columns start, end , duration, person_id, tdd
    # indexed (not unique) on tour_id
    choice_column = TDD_CHOICE_COLUMN
    alt_tdd = tdd_interaction_dataset(tours, alts, timetable, choice_column,
                                      window_id_col, tour_trace_label)
    print(f"tours {tours.shape} alts {alts.shape}")
    chunk.log_df(tour_trace_label, "alt_tdd", alt_tdd)

    # - add logsums
    if logsum_tour_purpose:
        logsums = \
            compute_logsums(alt_tdd, tours, logsum_tour_purpose, model_settings, skims, tour_trace_label)
    else:
        logsums = 0
    alt_tdd['mode_choice_logsum'] = logsums

    # - merge in previous tour columns
    # adds start_previous and end_previous, joins on index
    tours = \
        tours.join(get_previous_tour_by_tourid(tours[tour_owner_id_col], previous_tour, alts))
    chunk.log_df(tour_trace_label, "tours", tours)

    # - make choices
    locals_d = {'tt': timetable}
    constants = config.get_model_constants(model_settings)
    if constants is not None:
        locals_d.update(constants)

    preprocessor_settings = model_settings.get('ALTS_PREPROCESSOR', None)

    if preprocessor_settings and preprocessor_settings.get(
            logsum_tour_purpose):
        expressions.assign_columns(
            df=alt_tdd,
            model_settings=preprocessor_settings.get(logsum_tour_purpose),
            locals_dict=locals_d,
            trace_label=tour_trace_label)

    if estimator:
        # write choosers after annotation
        estimator.write_choosers(tours)
        estimator.set_alt_id(choice_column)
        estimator.write_interaction_sample_alternatives(alt_tdd)

    choices = interaction_sample_simulate(tours,
                                          alt_tdd,
                                          spec,
                                          choice_column=choice_column,
                                          locals_d=locals_d,
                                          chunk_size=0,
                                          trace_label=tour_trace_label,
                                          estimator=estimator)

    # - update previous_tour and timetable parameters

    # update previous_tour (series with most recent previous tdd choices) with latest values
    previous_tour.loc[tours[tour_owner_id_col]] = choices.values

    # update timetable with chosen tdd footprints
    timetable.assign(tours[window_id_col], choices)

    return choices
def atwork_subtour_destination_simulate(subtours, persons_merged,
                                        destination_sample, want_logsums,
                                        model_settings, skim_dict,
                                        destination_size_terms, estimator,
                                        chunk_size, trace_label):
    """
    atwork_subtour_destination model on atwork_subtour_destination_sample
    annotated with mode_choice logsum to select a destination from sample alternatives
    """

    model_spec = simulate.read_model_spec(file_name=model_settings['SPEC'])
    coefficients_df = simulate.read_model_coefficients(model_settings)
    model_spec = simulate.eval_coefficients(model_spec, coefficients_df,
                                            estimator)

    # interaction_sample_simulate insists choosers appear in same order as alts
    subtours = subtours.sort_index()

    # merge persons into tours
    choosers = pd.merge(subtours,
                        persons_merged,
                        left_on='person_id',
                        right_index=True)
    # FIXME - MEMORY HACK - only include columns actually used in spec
    chooser_columns = model_settings['SIMULATE_CHOOSER_COLUMNS']
    choosers = choosers[chooser_columns]

    if estimator:
        estimator.write_choosers(choosers)

    alt_dest_col_name = model_settings['ALT_DEST_COL_NAME']
    chooser_col_name = 'workplace_taz'

    # alternatives are pre-sampled and annotated with logsums and pick_count
    # but we have to merge destination_size_terms columns into alt sample list
    alternatives = \
        pd.merge(destination_sample, destination_size_terms,
                 left_on=alt_dest_col_name, right_index=True, how="left")

    tracing.dump_df(DUMP, alternatives, trace_label, 'alternatives')

    constants = config.get_model_constants(model_settings)

    logger.info("Running atwork_subtour_destination_simulate with %d persons",
                len(choosers))

    # create wrapper with keys for this lookup - in this case there is a TAZ in the choosers
    # and a TAZ in the alternatives which get merged during interaction
    # the skims will be available under the name "skims" for any @ expressions
    skims = skim_dict.wrap(chooser_col_name, alt_dest_col_name)

    locals_d = {
        'skims': skims,
    }
    if constants is not None:
        locals_d.update(constants)

    tracing.dump_df(DUMP, choosers, trace_label, 'choosers')

    choices = interaction_sample_simulate(
        choosers,
        alternatives,
        spec=model_spec,
        choice_column=alt_dest_col_name,
        want_logsums=want_logsums,
        skims=skims,
        locals_d=locals_d,
        chunk_size=chunk_size,
        trace_label=trace_label,
        trace_choice_name='workplace_location',
        estimator=estimator)

    if not want_logsums:
        # for consistency, always return a dataframe with canonical column name
        assert isinstance(choices, pd.Series)
        choices = choices.to_frame('choice')

    return choices
def joint_tour_composition(tours, households, persons, chunk_size,
                           trace_hh_id):
    """
    This model predicts the makeup of the travel party (adults, children, or mixed).
    """
    trace_label = 'joint_tour_composition'
    model_settings_file_name = 'joint_tour_composition.yaml'

    tours = tours.to_frame()
    joint_tours = tours[tours.tour_category == 'joint']

    # - if no joint tours
    if joint_tours.shape[0] == 0:
        add_null_results(trace_label, tours)
        return

    model_settings = config.read_model_settings(model_settings_file_name)
    estimator = estimation.manager.begin_estimation('joint_tour_composition')

    # - only interested in households with joint_tours
    households = households.to_frame()
    households = households[households.num_hh_joint_tours > 0]

    persons = persons.to_frame()
    persons = persons[persons.household_id.isin(households.index)]

    logger.info("Running joint_tour_composition with %d joint tours" %
                joint_tours.shape[0])

    # - run preprocessor
    preprocessor_settings = model_settings.get('preprocessor', None)
    if preprocessor_settings:

        locals_dict = {
            'persons': persons,
            'hh_time_window_overlap': hh_time_window_overlap
        }

        expressions.assign_columns(df=households,
                                   model_settings=preprocessor_settings,
                                   locals_dict=locals_dict,
                                   trace_label=trace_label)

    joint_tours_merged = pd.merge(joint_tours,
                                  households,
                                  left_on='household_id',
                                  right_index=True,
                                  how='left')

    # - simple_simulate
    model_spec = simulate.read_model_spec(file_name=model_settings['SPEC'])
    coefficients_df = simulate.read_model_coefficients(model_settings)
    model_spec = simulate.eval_coefficients(model_spec, coefficients_df,
                                            estimator)

    nest_spec = config.get_logit_model_settings(model_settings)
    constants = config.get_model_constants(model_settings)

    if estimator:
        estimator.write_spec(model_settings)
        estimator.write_model_settings(model_settings,
                                       model_settings_file_name)
        estimator.write_coefficients(coefficients_df, model_settings)
        estimator.write_choosers(joint_tours_merged)

    choices = simulate.simple_simulate(choosers=joint_tours_merged,
                                       spec=model_spec,
                                       nest_spec=nest_spec,
                                       locals_d=constants,
                                       chunk_size=chunk_size,
                                       trace_label=trace_label,
                                       trace_choice_name='composition',
                                       estimator=estimator)

    # convert indexes to alternative names
    choices = pd.Series(model_spec.columns[choices.values],
                        index=choices.index)

    if estimator:
        estimator.write_choices(choices)
        choices = estimator.get_survey_values(choices, 'tours', 'composition')
        estimator.write_override_choices(choices)
        estimator.end_estimation()

    # add composition column to tours for tracing
    joint_tours['composition'] = choices

    # reindex since we ran model on a subset of households
    tours['composition'] = choices.reindex(tours.index).fillna('').astype(str)
    pipeline.replace_table("tours", tours)

    tracing.print_summary('joint_tour_composition',
                          joint_tours.composition,
                          value_counts=True)

    if trace_hh_id:
        tracing.trace_df(joint_tours,
                         label="joint_tour_composition.joint_tours",
                         slicer='household_id')
def non_mandatory_tour_frequency(persons_merged,
                                 non_mandatory_tour_frequency_alts,
                                 non_mandatory_tour_frequency_spec,
                                 non_mandatory_tour_frequency_settings,
                                 chunk_size,
                                 trace_hh_id):

    """
    This model predicts the frequency of making non-mandatory trips
    (alternatives for this model come from a separate csv file which is
    configured by the user) - these trips include escort, shopping, othmaint,
    othdiscr, eatout, and social trips in various combination.
    """

    t0 = print_elapsed_time()

    choosers = persons_merged.to_frame()

    non_mandatory_tour_frequency_alts['tot_tours'] = non_mandatory_tour_frequency_alts.sum(axis=1)

    # filter based on results of CDAP
    choosers = choosers[choosers.cdap_activity.isin(['M', 'N'])]

    logger.info("Running non_mandatory_tour_frequency with %d persons" % len(choosers))

    constants = config.get_model_constants(non_mandatory_tour_frequency_settings)

    choices_list = []
    # segment by person type and pick the right spec for each person type
    for name, segment in choosers.groupby('ptype_cat'):

        logger.info("Running segment '%s' of size %d" % (name, len(segment)))

        choices = interaction_simulate(
            segment,
            non_mandatory_tour_frequency_alts,
            # notice that we pick the column for the segment for each segment we run
            spec=non_mandatory_tour_frequency_spec[[name]],
            locals_d=constants,
            chunk_size=chunk_size,
            trace_label='non_mandatory_tour_frequency.%s' % name,
            trace_choice_name='non_mandatory_tour_frequency')

        choices_list.append(choices)

        t0 = print_elapsed_time("non_mandatory_tour_frequency.%s" % name, t0, debug=True)

        # FIXME - force garbage collection
        # force_garbage_collect()

    choices = pd.concat(choices_list)

    tracing.print_summary('non_mandatory_tour_frequency', choices, value_counts=True)

    # FIXME - no need to reindex?
    # FIXME - how about the persons not processed
    inject.add_column("persons", "non_mandatory_tour_frequency", choices)

    create_non_mandatory_tours(trace_hh_id)

    # add non_mandatory_tour-dependent columns (e.g. tour counts) to persons
    pipeline.add_dependent_columns("persons", "persons_nmtf")

    if trace_hh_id:
        trace_columns = ['non_mandatory_tour_frequency']
        tracing.trace_df(inject.get_table('persons').to_frame(),
                         label="non_mandatory_tour_frequency.persons",
                         # columns=trace_columns,
                         warn_if_empty=True)
Example #27
0
def tour_mode_choice_simulate(tours, persons_merged,
                              skim_dict, skim_stack,
                              chunk_size,
                              trace_hh_id):
    """
    Tour mode choice simulate
    """
    trace_label = 'tour_mode_choice'
    model_settings = config.read_model_settings('tour_mode_choice.yaml')

    spec = tour_mode_choice_spec(model_settings)

    primary_tours = tours.to_frame()

    assert not (primary_tours.tour_category == 'atwork').any()

    persons_merged = persons_merged.to_frame()

    nest_spec = config.get_logit_model_settings(model_settings)
    constants = config.get_model_constants(model_settings)

    logger.info("Running %s with %d tours" % (trace_label, primary_tours.shape[0]))

    tracing.print_summary('tour_types',
                          primary_tours.tour_type, value_counts=True)

    primary_tours_merged = pd.merge(primary_tours, persons_merged, left_on='person_id',
                                    right_index=True, how='left', suffixes=('', '_r'))

    # setup skim keys
    orig_col_name = 'TAZ'
    dest_col_name = 'destination'
    out_time_col_name = 'start'
    in_time_col_name = 'end'
    odt_skim_stack_wrapper = skim_stack.wrap(left_key=orig_col_name, right_key=dest_col_name,
                                             skim_key='out_period')
    dot_skim_stack_wrapper = skim_stack.wrap(left_key=dest_col_name, right_key=orig_col_name,
                                             skim_key='in_period')
    od_skim_stack_wrapper = skim_dict.wrap(orig_col_name, dest_col_name)

    skims = {
        "odt_skims": odt_skim_stack_wrapper,
        "dot_skims": dot_skim_stack_wrapper,
        "od_skims": od_skim_stack_wrapper,
        'orig_col_name': orig_col_name,
        'dest_col_name': dest_col_name,
        'out_time_col_name': out_time_col_name,
        'in_time_col_name': in_time_col_name
    }

    choices_list = []
    for tour_type, segment in primary_tours_merged.groupby('tour_type'):

        logger.info("tour_mode_choice_simulate tour_type '%s' (%s tours)" %
                    (tour_type, len(segment.index), ))

        # name index so tracing knows how to slice
        assert segment.index.name == 'tour_id'

        choices = run_tour_mode_choice_simulate(
            segment,
            spec, tour_type, model_settings,
            skims=skims,
            constants=constants,
            nest_spec=nest_spec,
            chunk_size=chunk_size,
            trace_label=tracing.extend_trace_label(trace_label, tour_type),
            trace_choice_name='tour_mode_choice')

        tracing.print_summary('tour_mode_choice_simulate %s choices' % tour_type,
                              choices, value_counts=True)

        choices_list.append(choices)

        # FIXME - force garbage collection
        force_garbage_collect()

    choices = pd.concat(choices_list)

    tracing.print_summary('tour_mode_choice_simulate all tour type choices',
                          choices, value_counts=True)

    # so we can trace with annotations
    primary_tours['tour_mode'] = choices

    # but only keep mode choice col
    all_tours = tours.to_frame()
    # uncomment to save annotations to table
    # assign_in_place(all_tours, annotations)
    assign_in_place(all_tours, choices.to_frame('tour_mode'))

    pipeline.replace_table("tours", all_tours)

    if trace_hh_id:
        tracing.trace_df(primary_tours,
                         label=tracing.extend_trace_label(trace_label, 'tour_mode'),
                         slicer='tour_id',
                         index_label='tour_id',
                         warn_if_empty=True)
Example #28
0
def tour_mode_choice_simulate(tours, persons_merged, skim_dict, skim_stack,
                              chunk_size, trace_hh_id):
    """
    Tour mode choice simulate
    """
    trace_label = 'tour_mode_choice'
    model_settings_file_name = 'tour_mode_choice.yaml'
    model_settings = config.read_model_settings(model_settings_file_name)

    logsum_column_name = model_settings.get('MODE_CHOICE_LOGSUM_COLUMN_NAME')
    mode_column_name = 'tour_mode'  # FIXME - should be passed in?

    primary_tours = tours.to_frame()
    assert not (primary_tours.tour_category == 'atwork').any()

    persons_merged = persons_merged.to_frame()

    constants = config.get_model_constants(model_settings)

    logger.info("Running %s with %d tours" %
                (trace_label, primary_tours.shape[0]))

    tracing.print_summary('tour_types',
                          primary_tours.tour_type,
                          value_counts=True)

    primary_tours_merged = pd.merge(primary_tours,
                                    persons_merged,
                                    left_on='person_id',
                                    right_index=True,
                                    how='left',
                                    suffixes=('', '_r'))

    # setup skim keys
    orig_col_name = 'TAZ'
    dest_col_name = 'destination'
    out_time_col_name = 'start'
    in_time_col_name = 'end'
    odt_skim_stack_wrapper = skim_stack.wrap(left_key=orig_col_name,
                                             right_key=dest_col_name,
                                             skim_key='out_period')
    dot_skim_stack_wrapper = skim_stack.wrap(left_key=dest_col_name,
                                             right_key=orig_col_name,
                                             skim_key='in_period')
    odr_skim_stack_wrapper = skim_stack.wrap(left_key=orig_col_name,
                                             right_key=dest_col_name,
                                             skim_key='in_period')
    dor_skim_stack_wrapper = skim_stack.wrap(left_key=dest_col_name,
                                             right_key=orig_col_name,
                                             skim_key='out_period')
    od_skim_stack_wrapper = skim_dict.wrap(orig_col_name, dest_col_name)

    skims = {
        "odt_skims": odt_skim_stack_wrapper,
        "dot_skims": dot_skim_stack_wrapper,
        "odr_skims": odr_skim_stack_wrapper,
        "dor_skims": dor_skim_stack_wrapper,
        "od_skims": od_skim_stack_wrapper,
        'orig_col_name': orig_col_name,
        'dest_col_name': dest_col_name,
        'out_time_col_name': out_time_col_name,
        'in_time_col_name': in_time_col_name
    }

    estimator = estimation.manager.begin_estimation('tour_mode_choice')
    if estimator:
        estimator.write_coefficients(
            simulate.read_model_coefficients(model_settings))
        estimator.write_coefficients_template(
            simulate.read_model_coefficient_template(model_settings))
        estimator.write_spec(model_settings)
        estimator.write_model_settings(model_settings,
                                       model_settings_file_name)
        # FIXME run_tour_mode_choice_simulate writes choosers post-annotation

    choices_list = []
    primary_tours_merged['primary_purpose'] = \
        primary_tours_merged.tour_type.where((primary_tours_merged.tour_type != 'school') |
                                             ~primary_tours_merged.is_university, 'univ')

    for primary_purpose, tours_segment in primary_tours_merged.groupby(
            'primary_purpose'):

        logger.info(
            "tour_mode_choice_simulate primary_purpose '%s' (%s tours)" % (
                primary_purpose,
                len(tours_segment.index),
            ))

        # name index so tracing knows how to slice
        assert tours_segment.index.name == 'tour_id'

        choices_df = run_tour_mode_choice_simulate(
            tours_segment,
            primary_purpose,
            model_settings,
            mode_column_name=mode_column_name,
            logsum_column_name=logsum_column_name,
            skims=skims,
            constants=constants,
            estimator=estimator,
            chunk_size=chunk_size,
            trace_label=tracing.extend_trace_label(trace_label,
                                                   primary_purpose),
            trace_choice_name='tour_mode_choice')

        tracing.print_summary('tour_mode_choice_simulate %s choices_df' %
                              primary_purpose,
                              choices_df.tour_mode,
                              value_counts=True)

        choices_list.append(choices_df)

        # FIXME - force garbage collection
        force_garbage_collect()

    choices_df = pd.concat(choices_list)

    if estimator:
        estimator.write_choices(choices_df.tour_mode)
        choices_df.tour_mode = estimator.get_survey_values(
            choices_df.tour_mode, 'tours', 'tour_mode')
        estimator.write_override_choices(choices_df.tour_mode)
        estimator.end_estimation()

    tracing.print_summary('tour_mode_choice_simulate all tour type choices',
                          choices_df.tour_mode,
                          value_counts=True)

    # so we can trace with annotations
    assign_in_place(primary_tours, choices_df)

    # but only keep mode choice col
    all_tours = tours.to_frame()
    assign_in_place(all_tours, choices_df)

    pipeline.replace_table("tours", all_tours)

    if trace_hh_id:
        tracing.trace_df(primary_tours,
                         label=tracing.extend_trace_label(
                             trace_label, mode_column_name),
                         slicer='tour_id',
                         index_label='tour_id',
                         warn_if_empty=True)
Example #29
0
def _compute_logsums(alt_tdd, tours_merged, tour_purpose, model_settings,
                     trace_label):
    """
    compute logsums for tours using skims for alt_tdd out_period and in_period
    """

    trace_label = tracing.extend_trace_label(trace_label, 'logsums')

    logsum_settings = config.read_model_settings(
        model_settings['LOGSUM_SETTINGS'])

    choosers = alt_tdd.join(tours_merged, how='left', rsuffix='_chooser')
    logger.info("%s compute_logsums for %d choosers%s alts" %
                (trace_label, choosers.shape[0], alt_tdd.shape[0]))

    # - setup skims

    skim_dict = inject.get_injectable('skim_dict')
    skim_stack = inject.get_injectable('skim_stack')

    orig_col_name = 'TAZ'
    dest_col_name = model_settings.get('DESTINATION_FOR_TOUR_PURPOSE').get(
        tour_purpose)

    odt_skim_stack_wrapper = skim_stack.wrap(left_key=orig_col_name,
                                             right_key=dest_col_name,
                                             skim_key='out_period')
    dot_skim_stack_wrapper = skim_stack.wrap(left_key=dest_col_name,
                                             right_key=orig_col_name,
                                             skim_key='in_period')
    odr_skim_stack_wrapper = skim_stack.wrap(left_key=orig_col_name,
                                             right_key=dest_col_name,
                                             skim_key='in_period')
    dor_skim_stack_wrapper = skim_stack.wrap(left_key=dest_col_name,
                                             right_key=orig_col_name,
                                             skim_key='out_period')
    od_skim_stack_wrapper = skim_dict.wrap(orig_col_name, dest_col_name)

    skims = {
        "odt_skims": odt_skim_stack_wrapper,
        "dot_skims": dot_skim_stack_wrapper,
        "odr_skims": odr_skim_stack_wrapper,
        "dor_skims": dor_skim_stack_wrapper,
        "od_skims": od_skim_stack_wrapper,
        'orig_col_name': orig_col_name,
        'dest_col_name': dest_col_name,
    }

    # - locals_dict
    constants = config.get_model_constants(logsum_settings)

    locals_dict = {}
    locals_dict.update(constants)
    locals_dict.update(skims)

    # - run preprocessor to annotate choosers
    # allow specification of alternate preprocessor for nontour choosers
    preprocessor = model_settings.get('LOGSUM_PREPROCESSOR', 'preprocessor')
    preprocessor_settings = logsum_settings[preprocessor]

    if preprocessor_settings:

        simulate.set_skim_wrapper_targets(choosers, skims)

        expressions.assign_columns(df=choosers,
                                   model_settings=preprocessor_settings,
                                   locals_dict=locals_dict,
                                   trace_label=trace_label)

    # - compute logsums

    coefficients = simulate.get_segment_coefficients(logsum_settings,
                                                     tour_purpose)
    logsum_spec = simulate.read_model_spec(file_name=logsum_settings['SPEC'])
    logsum_spec = simulate.eval_coefficients(logsum_spec,
                                             coefficients,
                                             estimator=None)

    nest_spec = config.get_logit_model_settings(logsum_settings)
    nest_spec = simulate.eval_nest_coefficients(nest_spec, coefficients)

    # constrained coefficients can appear in expressions
    locals_dict.update(coefficients)

    logsums = simulate.simple_simulate_logsums(choosers,
                                               logsum_spec,
                                               nest_spec,
                                               skims=skims,
                                               locals_d=locals_dict,
                                               chunk_size=0,
                                               trace_label=trace_label)

    return logsums
Example #30
0
def run_location_simulate(segment_name, persons_merged, location_sample_df,
                          network_los, dest_size_terms, want_logsums,
                          estimator, model_settings, chunk_size, chunk_tag,
                          trace_label):
    """
    run location model on location_sample annotated with mode_choice logsum
    to select a dest zone from sample alternatives

    Returns
    -------
    choices : pandas.DataFrame indexed by persons_merged_df.index
        choice : location choices (zone ids)
        logsum : float logsum of choice utilities across alternatives

    logsums optional & only returned if DEST_CHOICE_LOGSUM_COLUMN_NAME specified in model_settings
    """
    assert not persons_merged.empty

    # FIXME - MEMORY HACK - only include columns actually used in spec
    chooser_columns = model_settings['SIMULATE_CHOOSER_COLUMNS']
    choosers = persons_merged[chooser_columns]

    alt_dest_col_name = model_settings['ALT_DEST_COL_NAME']

    # alternatives are pre-sampled and annotated with logsums and pick_count
    # but we have to merge additional alt columns into alt sample list
    alternatives = \
        pd.merge(location_sample_df, dest_size_terms,
                 left_on=alt_dest_col_name, right_index=True, how="left")

    logger.info("Running %s with %d persons" % (trace_label, len(choosers)))

    # create wrapper with keys for this lookup - in this case there is a home_zone_id in the choosers
    # and a zone_id in the alternatives which get merged during interaction
    # the skims will be available under the name "skims" for any @ expressions
    skim_dict = network_los.get_default_skim_dict()
    skims = skim_dict.wrap('home_zone_id', alt_dest_col_name)

    locals_d = {'skims': skims, 'segment_size': segment_name}
    constants = config.get_model_constants(model_settings)
    if constants is not None:
        locals_d.update(constants)

    if estimator:
        # write choosers after annotation
        estimator.write_choosers(choosers)
        estimator.set_alt_id(alt_dest_col_name)
        estimator.write_interaction_sample_alternatives(alternatives)

    spec = simulate.spec_for_segment(model_settings,
                                     spec_id='SPEC',
                                     segment_name=segment_name,
                                     estimator=estimator)

    log_alt_losers = config.setting('log_alt_losers', False)

    choices = interaction_sample_simulate(
        choosers,
        alternatives,
        spec=spec,
        choice_column=alt_dest_col_name,
        log_alt_losers=log_alt_losers,
        want_logsums=want_logsums,
        skims=skims,
        locals_d=locals_d,
        chunk_size=chunk_size,
        chunk_tag=chunk_tag,
        trace_label=trace_label,
        trace_choice_name=model_settings['DEST_CHOICE_COLUMN_NAME'],
        estimator=estimator)

    if not want_logsums:
        # for consistency, always return a dataframe with canonical column name
        assert isinstance(choices, pd.Series)
        choices = choices.to_frame('choice')

    assert isinstance(choices, pd.DataFrame)

    return choices
Example #31
0
def demographics_processor(persons, persons_merged, demographics_spec,
                           demographics_settings, chunk_size, trace_hh_id):

    # the choice model will be applied to each row of the choosers table (a pandas.DataFrame)
    persons_df = persons_merged.to_frame()

    logger.info(
        "Running demographics_processor with %d persons (chunk size = %s)" %
        (len(persons_df), chunk_size))

    # locals whose values will be accessible to the execution context
    # when the expressions in spec are applied to choosers
    locals_dict = config.get_model_constants(demographics_settings)
    locals_dict.update(config.setting('globals'))

    trace_rows = trace_hh_id and persons_df['household_id'] == trace_hh_id

    # eval_variables evaluates each of the expressions in spec
    # in the context of each row in of the choosers dataframe
    results, trace_results, trace_assigned_locals \
        = assign.assign_variables(demographics_spec,
                                  persons_df,
                                  locals_dict,
                                  df_alias='persons',
                                  trace_rows=trace_rows)

    # add assigned columns to persons as they are needed by downstream processors
    persons = persons.to_frame()
    assign_in_place(persons, results)
    pipeline.replace_table("persons", persons)

    # coc groups with counts
    # TODO - should we allow specifying which assigned columns are coc (e.g. in settings?)
    # for now, assume all assigned columns are coc, but this could cramp modelers style
    # if they want to create additional demographic columns for downstream use that aren't coc
    coc_columns = list(results.columns)

    inject.add_injectable("coc_column_names", coc_columns)

    # - create table with coc columns as indexes and a single column 'persons' with counts
    # index                        persons
    # coc_poverty coc_age
    # False       False            20
    #             True              3
    # True        False             4
    coc_grouped = results.groupby(coc_columns)
    coc_grouped = coc_grouped[coc_columns[0]].count().to_frame(name='persons')

    pipeline.replace_table("coc_results", coc_grouped)

    add_summary_results(coc_grouped)

    if trace_hh_id:

        if trace_results is not None:

            tracing.write_csv(trace_results,
                              file_name="demographics",
                              index_label='person_idx',
                              column_labels=['label', 'person'])

        if trace_assigned_locals:
            tracing.write_csv(trace_assigned_locals,
                              file_name="demographics_locals")
def atwork_subtour_frequency(tours, persons_merged,
                             atwork_subtour_frequency_spec,
                             atwork_subtour_frequency_settings,
                             atwork_subtour_frequency_alternatives, chunk_size,
                             trace_hh_id):
    """
    This model predicts the frequency of making at-work subtour tours
    (alternatives for this model come from a separate csv file which is
    configured by the user).
    """

    trace_label = 'atwork_subtour_frequency'

    tours = tours.to_frame()
    persons_merged = persons_merged.to_frame()

    work_tours = tours[tours.tour_type == 'work']

    # merge persons into work_tours
    work_tours = pd.merge(work_tours,
                          persons_merged,
                          left_on='person_id',
                          right_index=True)

    logger.info("Running atwork_subtour_frequency with %d work tours" %
                len(work_tours))

    nest_spec = config.get_logit_model_settings(
        atwork_subtour_frequency_settings)
    constants = config.get_model_constants(atwork_subtour_frequency_settings)

    choices = simulate.simple_simulate(
        choosers=work_tours,
        spec=atwork_subtour_frequency_spec,
        nest_spec=nest_spec,
        locals_d=constants,
        trace_label=trace_label,
        trace_choice_name='atwork_subtour_frequency')

    # convert indexes to alternative names
    choices = pd.Series(atwork_subtour_frequency_spec.columns[choices.values],
                        index=choices.index)

    tracing.print_summary('atwork_subtour_frequency',
                          choices,
                          value_counts=True)

    # reindex since we are working with a subset of tours
    choices = choices.reindex(tours.index)

    # add atwork_subtour_frequency column to tours
    tours['atwork_subtour_frequency'] = choices
    pipeline.replace_table("tours", tours)

    # - create atwork_subtours based on atwork_subtour_frequency choice names
    work_tours = tours[tours.tour_type == 'work']
    assert not work_tours.atwork_subtour_frequency.isnull().any()

    subtours = process_atwork_subtours(work_tours,
                                       atwork_subtour_frequency_alternatives)

    pipeline.extend_table("tours", subtours)
    tracing.register_traceable_table('tours', subtours)
    pipeline.get_rn_generator().add_channel(subtours, 'tours')

    if trace_hh_id:
        trace_columns = ['atwork_subtour_frequency']
        tracing.trace_df(inject.get_table('tours').to_frame(),
                         label=trace_label,
                         columns=trace_columns,
                         warn_if_empty=True)
Example #33
0
def tour_mode_choice_simulate(tours_merged, tour_mode_choice_spec,
                              tour_mode_choice_settings, skim_dict, skim_stack,
                              chunk_size, trace_hh_id):
    """
    Tour mode choice simulate
    """

    trace_label = 'tour_mode_choice'

    tours = tours_merged.to_frame()

    tours = tours[tours.tour_category != 'subtour']

    nest_spec = config.get_logit_model_settings(tour_mode_choice_settings)
    constants = config.get_model_constants(tour_mode_choice_settings)

    logger.info("Running tour_mode_choice_simulate with %d tours" %
                len(tours.index))

    tracing.print_summary('tour_mode_choice_simulate tour_type',
                          tours.tour_type,
                          value_counts=True)

    if trace_hh_id:
        tracing.trace_df(tour_mode_choice_spec,
                         tracing.extend_trace_label(trace_label, 'spec'),
                         slicer='NONE',
                         transpose=False)

    # setup skim keys
    odt_skim_stack_wrapper = skim_stack.wrap(left_key='TAZ',
                                             right_key='destination',
                                             skim_key="out_period")
    dot_skim_stack_wrapper = skim_stack.wrap(left_key='destination',
                                             right_key='TAZ',
                                             skim_key="in_period")
    od_skims = skim_dict.wrap('TAZ', 'destination')

    choices_list = []

    for tour_type, segment in tours.groupby('tour_type'):

        # if tour_type != 'work':
        #     continue

        logger.info("tour_mode_choice_simulate tour_type '%s' (%s tours)" % (
            tour_type,
            len(segment.index),
        ))

        # name index so tracing knows how to slice
        segment.index.name = 'tour_id'

        spec = get_segment_and_unstack(tour_mode_choice_spec, tour_type)

        if trace_hh_id:
            tracing.trace_df(spec,
                             tracing.extend_trace_label(
                                 trace_label, 'spec.%s' % tour_type),
                             slicer='NONE',
                             transpose=False)

        choices = _mode_choice_simulate(
            segment,
            odt_skim_stack_wrapper=odt_skim_stack_wrapper,
            dot_skim_stack_wrapper=dot_skim_stack_wrapper,
            od_skim_stack_wrapper=od_skims,
            spec=spec,
            constants=constants,
            nest_spec=nest_spec,
            chunk_size=chunk_size,
            trace_label=tracing.extend_trace_label(trace_label, tour_type),
            trace_choice_name='tour_mode_choice')

        tracing.print_summary('tour_mode_choice_simulate %s choices' %
                              tour_type,
                              choices,
                              value_counts=True)

        choices_list.append(choices)

        # FIXME - force garbage collection
        force_garbage_collect()

    choices = pd.concat(choices_list)

    tracing.print_summary('tour_mode_choice_simulate all tour type choices',
                          choices,
                          value_counts=True)

    inject.add_column("tours", "mode", choices)

    if trace_hh_id:
        trace_columns = ['mode', 'person_id', 'tour_type', 'tour_num']
        tracing.trace_df(inject.get_table('tours').to_frame(),
                         label=tracing.extend_trace_label(trace_label, 'mode'),
                         slicer='tour_id',
                         index_label='tour_id',
                         columns=trace_columns,
                         warn_if_empty=True)
Example #34
0
def trip_mode_choice(trips, network_los, chunk_size, trace_hh_id):
    """
    Trip mode choice - compute trip_mode (same values as for tour_mode) for each trip.

    Modes for each primary tour putpose are calculated separately because they have different
    coefficient values (stored in trip_mode_choice_coefficients.csv coefficient file.)

    Adds trip_mode column to trip table
    """

    trace_label = 'trip_mode_choice'
    model_settings_file_name = 'trip_mode_choice.yaml'
    model_settings = config.read_model_settings(model_settings_file_name)

    logsum_column_name = model_settings.get('MODE_CHOICE_LOGSUM_COLUMN_NAME')
    mode_column_name = 'trip_mode'

    trips_df = trips.to_frame()
    logger.info("Running %s with %d trips", trace_label, trips_df.shape[0])

    # give trip mode choice the option to run without calling tours_merged. Useful for xborder
    # model where tour_od_choice needs trip mode choice logsums before some of the join keys
    # needed by tour_merged (e.g. home_zone_id) exist
    tours_cols = [
        col for col in model_settings['TOURS_MERGED_CHOOSER_COLUMNS']
        if col not in trips_df.columns
    ]
    if len(tours_cols) > 0:
        tours_merged = inject.get_table('tours_merged').to_frame(
            columns=tours_cols)
    else:
        tours_merged = pd.DataFrame()

    # - trips_merged - merge trips and tours_merged
    trips_merged = pd.merge(trips_df,
                            tours_merged,
                            left_on='tour_id',
                            right_index=True,
                            how="left")
    assert trips_merged.index.equals(trips.index)

    tracing.print_summary('primary_purpose',
                          trips_df.primary_purpose,
                          value_counts=True)

    # setup skim keys
    assert ('trip_period' not in trips_merged)
    trips_merged['trip_period'] = network_los.skim_time_period_label(
        trips_merged.depart)

    orig_col = 'origin'
    dest_col = 'destination'
    min_per_period = network_los.skim_time_periods['period_minutes']
    periods_per_hour = 60 / min_per_period

    constants = {}
    constants.update(config.get_model_constants(model_settings))
    constants.update({
        'ORIGIN': orig_col,
        'DESTINATION': dest_col,
        'MIN_PER_PERIOD': min_per_period,
        'PERIODS_PER_HOUR': periods_per_hour
    })

    skim_dict = network_los.get_default_skim_dict()

    odt_skim_stack_wrapper = skim_dict.wrap_3d(orig_key=orig_col,
                                               dest_key=dest_col,
                                               dim3_key='trip_period')
    dot_skim_stack_wrapper = skim_dict.wrap_3d(orig_key=dest_col,
                                               dest_key=orig_col,
                                               dim3_key='trip_period')
    od_skim_wrapper = skim_dict.wrap('origin', 'destination')

    skims = {
        "odt_skims": odt_skim_stack_wrapper,
        "dot_skims": dot_skim_stack_wrapper,
        "od_skims": od_skim_wrapper,
    }

    if network_los.zone_system == los.THREE_ZONE:
        # fixme - is this a lightweight object?
        tvpb = network_los.tvpb
        tvpb_recipe = model_settings.get('TVPB_recipe', 'tour_mode_choice')
        tvpb_logsum_odt = tvpb.wrap_logsum(orig_key=orig_col,
                                           dest_key=dest_col,
                                           tod_key='trip_period',
                                           segment_key='demographic_segment',
                                           recipe=tvpb_recipe,
                                           cache_choices=True,
                                           trace_label=trace_label,
                                           tag='tvpb_logsum_odt')
        skims.update({
            'tvpb_logsum_odt': tvpb_logsum_odt,
        })

        # This if-clause gives the user the option of NOT inheriting constants
        # from the tvpb settings. previously, these constants were inherited
        # automatically, which had the undesirable effect of overwriting any
        # trip mode choice model constants/coefficients that shared the same
        # name. The default behavior is still the same (True), but the user
        # can now avoid any chance of squashing these local variables by
        # adding `use_TVPB_constants: False` to the trip_mode_choice.yaml file.
        # the tvpb will still use the constants as defined in the recipe
        # specified above in `tvpb.wrap_logsum()` but they will not be used
        # in the trip mode choice expressions.
        if model_settings.get('use_TVPB_constants', True):
            constants.update(
                network_los.setting(
                    'TVPB_SETTINGS.tour_mode_choice.CONSTANTS'))

    # don't create estimation data bundle if trip mode choice is being called
    # from another model step (e.g. tour mode choice logsum creation)
    if pipeline._PIPELINE.rng().step_name != 'trip_mode_choice':
        estimator = None
    else:
        estimator = estimation.manager.begin_estimation('trip_mode_choice')
    if estimator:
        estimator.write_coefficients(model_settings=model_settings)
        estimator.write_coefficients_template(model_settings=model_settings)
        estimator.write_spec(model_settings)
        estimator.write_model_settings(model_settings,
                                       model_settings_file_name)

    model_spec = simulate.read_model_spec(file_name=model_settings['SPEC'])
    nest_spec = config.get_logit_model_settings(model_settings)

    choices_list = []
    for primary_purpose, trips_segment in trips_merged.groupby(
            'primary_purpose'):

        segment_trace_label = tracing.extend_trace_label(
            trace_label, primary_purpose)

        logger.info("trip_mode_choice tour_type '%s' (%s trips)" % (
            primary_purpose,
            len(trips_segment.index),
        ))

        # name index so tracing knows how to slice
        assert trips_segment.index.name == 'trip_id'

        if network_los.zone_system == los.THREE_ZONE:
            tvpb_logsum_odt.extend_trace_label(primary_purpose)
            # tvpb_logsum_dot.extend_trace_label(primary_purpose)

        coefficients = simulate.get_segment_coefficients(
            model_settings, primary_purpose)

        locals_dict = {}
        locals_dict.update(constants)

        constants_keys = constants.keys()
        if any([coeff in constants_keys for coeff in coefficients.keys()]):
            logger.warning(
                "coefficients are obscuring constants in locals_dict")
        locals_dict.update(coefficients)

        # have to initialize chunker for preprocessing in order to access
        # tvpb logsum terms in preprocessor expressions.
        with chunk.chunk_log(tracing.extend_trace_label(
                trace_label, 'preprocessing'),
                             base=True):
            expressions.annotate_preprocessors(trips_segment, locals_dict,
                                               skims, model_settings,
                                               segment_trace_label)

        if estimator:
            # write choosers after annotation
            estimator.write_choosers(trips_segment)

        locals_dict.update(skims)

        choices = mode_choice_simulate(
            choosers=trips_segment,
            spec=simulate.eval_coefficients(model_spec, coefficients,
                                            estimator),
            nest_spec=simulate.eval_nest_coefficients(nest_spec, coefficients,
                                                      segment_trace_label),
            skims=skims,
            locals_d=locals_dict,
            chunk_size=chunk_size,
            mode_column_name=mode_column_name,
            logsum_column_name=logsum_column_name,
            trace_label=segment_trace_label,
            trace_choice_name='trip_mode_choice',
            estimator=estimator)

        if trace_hh_id:
            # trace the coefficients
            tracing.trace_df(pd.Series(locals_dict),
                             label=tracing.extend_trace_label(
                                 segment_trace_label, 'constants'),
                             transpose=False,
                             slicer='NONE')

            # so we can trace with annotations
            assign_in_place(trips_segment, choices)

            tracing.trace_df(trips_segment,
                             label=tracing.extend_trace_label(
                                 segment_trace_label, 'trip_mode'),
                             slicer='tour_id',
                             index_label='tour_id',
                             warn_if_empty=True)

        choices_list.append(choices)

    choices_df = pd.concat(choices_list)

    # add cached tvpb_logsum tap choices for modes specified in tvpb_mode_path_types
    if network_los.zone_system == los.THREE_ZONE:

        tvpb_mode_path_types = model_settings.get('tvpb_mode_path_types')
        for mode, path_type in tvpb_mode_path_types.items():

            skim_cache = tvpb_logsum_odt.cache[path_type]

            for c in skim_cache:
                dest_col = c
                if dest_col not in choices_df:
                    choices_df[
                        dest_col] = np.nan if pd.api.types.is_numeric_dtype(
                            skim_cache[c]) else ''
                choices_df[dest_col].where(
                    choices_df[mode_column_name] != mode,
                    skim_cache[c],
                    inplace=True)

    if estimator:
        estimator.write_choices(choices_df.trip_mode)
        choices_df.trip_mode = estimator.get_survey_values(
            choices_df.trip_mode, 'trips', 'trip_mode')
        estimator.write_override_choices(choices_df.trip_mode)
        estimator.end_estimation()
    trips_df = trips.to_frame()
    assign_in_place(trips_df, choices_df)

    tracing.print_summary('trip_modes',
                          trips_merged.tour_mode,
                          value_counts=True)

    tracing.print_summary('trip_mode_choice choices',
                          trips_df[mode_column_name],
                          value_counts=True)

    assert not trips_df[mode_column_name].isnull().any()

    pipeline.replace_table("trips", trips_df)

    if trace_hh_id:
        tracing.trace_df(trips_df,
                         label=tracing.extend_trace_label(
                             trace_label, 'trip_mode'),
                         slicer='trip_id',
                         index_label='trip_id',
                         warn_if_empty=True)
Example #35
0
def run_destination_simulate(spec_segment_name, tours, persons_merged,
                             destination_sample, want_logsums, model_settings,
                             skim_dict, destination_size_terms, estimator,
                             chunk_size, trace_label):
    """
    run destination_simulate on tour_destination_sample
    annotated with mode_choice logsum to select a destination from sample alternatives
    """

    spec = simulate.spec_for_segment(model_settings,
                                     spec_id='SPEC',
                                     segment_name=spec_segment_name,
                                     estimator=estimator)

    # merge persons into tours
    choosers = pd.merge(tours,
                        persons_merged,
                        left_on='person_id',
                        right_index=True,
                        how='left')
    # FIXME - MEMORY HACK - only include columns actually used in spec
    chooser_columns = model_settings['SIMULATE_CHOOSER_COLUMNS']
    choosers = choosers[chooser_columns]
    if estimator:
        estimator.write_choosers(choosers)

    alt_dest_col_name = model_settings["ALT_DEST_COL_NAME"]
    origin_col_name = model_settings['CHOOSER_ORIG_COL_NAME']

    # alternatives are pre-sampled and annotated with logsums and pick_count
    # but we have to merge size_terms column into alt sample list
    destination_sample['size_term'] = \
        reindex(destination_size_terms.size_term, destination_sample[alt_dest_col_name])

    constants = config.get_model_constants(model_settings)

    logger.info("Running tour_destination_simulate with %d persons",
                len(choosers))

    # create wrapper with keys for this lookup - in this case there is a TAZ in the choosers
    # and a TAZ in the alternatives which get merged during interaction
    # the skims will be available under the name "skims" for any @ expressions
    skims = skim_dict.wrap(origin_col_name, alt_dest_col_name)

    locals_d = {
        'skims': skims,
    }
    if constants is not None:
        locals_d.update(constants)

    choices = interaction_sample_simulate(choosers,
                                          destination_sample,
                                          spec=spec,
                                          choice_column=alt_dest_col_name,
                                          want_logsums=want_logsums,
                                          skims=skims,
                                          locals_d=locals_d,
                                          chunk_size=chunk_size,
                                          trace_label=trace_label,
                                          trace_choice_name='destination',
                                          estimator=estimator)

    return choices
Example #36
0
def physical_activity_processor(trips_with_demographics, persons_merged,
                                physical_activity_trip_spec,
                                physical_activity_person_spec,
                                physical_activity_settings, coc_column_names,
                                settings, chunk_size, trace_hh_id):
    """
    Compute physical benefits

    Physical activity benefits generally accrue if the net physical activity for an individual
    exceeds a certain threshold. We calculate individual physical activity based on trips,
    so we need to compute trip activity and then sum up to the person level to calculate benefits.
    We chunk trips by household id to ensure that all of a persons trips are in the same chunk.
    """

    trips_df = trips_with_demographics.to_frame()
    persons_df = persons_merged.to_frame()
    trace_label = 'physical_activity'

    logger.info(
        "Running physical_activity_processor with %d trips for %d persons " %
        (len(trips_df), len(persons_df)))

    locals_dict = config.get_model_constants(physical_activity_settings)
    locals_dict.update(config.setting('globals'))

    trip_trace_rows = trace_hh_id and trips_df.household_id == trace_hh_id

    rows_per_chunk, effective_chunk_size = \
        physical_activity_rpc(chunk_size, trips_df, persons_df,
                              physical_activity_trip_spec, trace_label)

    logger.info("physical_activity_processor chunk_size %s rows_per_chunk %s" %
                (chunk_size, rows_per_chunk))

    coc_summary = None
    result_list = []

    # iterate over trips df chunked by hh_id
    for i, num_chunks, trips_chunk, trace_rows_chunk \
            in bca.chunked_df_by_chunk_id(trips_df, trip_trace_rows, rows_per_chunk):

        logger.info("%s chunk %s of %s" % (trace_label, i, num_chunks))

        trip_activity, trip_trace_results, trip_trace_assigned_locals = \
            assign.assign_variables(physical_activity_trip_spec,
                                    trips_chunk,
                                    locals_dict=locals_dict,
                                    df_alias='trips',
                                    trace_rows=trace_rows_chunk)

        # since tracing is at household level, trace_results will occur in only one chunk
        # we can just write them out when we see them without need to accumulate across chunks
        if trip_trace_results is not None:
            tracing.write_csv(trip_trace_results,
                              file_name="physical_activity_trips",
                              index_label='trip_id',
                              column_labels=['label', 'trip'])

            if trip_trace_assigned_locals:
                tracing.write_csv(trip_trace_assigned_locals,
                                  file_name="physical_activity_trips_locals")

        # sum trip activity for each unique person
        trip_activity = trip_activity.groupby(trips_chunk.person_id).sum()

        # merge in persons columns for this chunk
        persons_chunk = pd.merge(trip_activity,
                                 persons_df,
                                 left_index=True,
                                 right_index=True)

        # trace rows array for this chunk
        person_trace_rows = trace_hh_id and persons_chunk[
            'household_id'] == trace_hh_id

        person_activity, person_trace_results, person_trace_assigned_locals = \
            assign.assign_variables(physical_activity_person_spec,
                                    persons_chunk,
                                    locals_dict=locals_dict,
                                    df_alias='persons',
                                    trace_rows=person_trace_rows)

        # since tracing is at household level, trace_results will occur in only one chunk
        # we can just write them out when we see them without need to accumulate across chunks
        if person_trace_results is not None:
            tracing.write_csv(person_trace_results,
                              file_name="physical_activity_persons",
                              index_label='persons_merged_table_index',
                              column_labels=['label', 'person'])

            if person_trace_assigned_locals:
                tracing.write_csv(person_trace_assigned_locals,
                                  file_name="physical_activity_persons_locals")

        # concat in the coc columns and summarize the chunk by coc
        person_activity = pd.concat(
            [persons_chunk[coc_column_names], person_activity], axis=1)
        coc_summary = person_activity.groupby(coc_column_names).sum()

        result_list.append(coc_summary)

        chunk_trace_label = 'trace_label chunk_%s' % i
        chunk.log_open(chunk_trace_label, chunk_size, effective_chunk_size)
        chunk.log_df(chunk_trace_label, 'trips_chunk', trips_chunk)
        chunk.log_df(chunk_trace_label, 'persons_chunk', persons_chunk)
        chunk.log_close(chunk_trace_label)

    if len(result_list) > 1:

        # (if there was only one chunk, then concat is redundant)
        coc_summary = pd.concat(result_list)

        # squash the accumulated chunk summaries by reapplying group and sum
        coc_summary.reset_index(inplace=True)
        coc_summary = coc_summary.groupby(coc_column_names).sum()

    result_prefix = 'PA_'
    add_result_columns("coc_results", coc_summary, result_prefix)
    add_summary_results(coc_summary,
                        prefix=result_prefix,
                        spec=physical_activity_person_spec)
Example #37
0
def stop_frequency(tours, tours_merged, stop_frequency_alts, network_los,
                   chunk_size, trace_hh_id):
    """
    stop frequency model

    For each tour, shoose a number of intermediate inbound stops and outbound stops.
    Create a trip table with inbound and outbound trips.

    Thus, a tour with stop_frequency '2out_0in' will have two outbound and zero inbound stops,
    and four corresponding trips: three outbound, and one inbound.

    Adds stop_frequency str column to trips, with fields

    creates trips table with columns:

    ::

        - person_id
        - household_id
        - tour_id
        - primary_purpose
        - atwork
        - trip_num
        - outbound
        - trip_count

    """

    trace_label = 'stop_frequency'
    model_settings_file_name = 'stop_frequency.yaml'

    model_settings = config.read_model_settings(model_settings_file_name)

    tours = tours.to_frame()
    tours_merged = tours_merged.to_frame()
    assert not tours_merged.household_id.isnull().any()
    assert not (tours_merged.origin == -1).any()
    assert not (tours_merged.destination == -1).any()

    nest_spec = config.get_logit_model_settings(model_settings)
    constants = config.get_model_constants(model_settings)

    # - run preprocessor to annotate tours_merged
    preprocessor_settings = model_settings.get('preprocessor', None)
    if preprocessor_settings:

        # hack: preprocessor adds origin column in place if it does not exist already
        assert 'origin' in tours_merged
        assert 'destination' in tours_merged
        od_skim_stack_wrapper = network_los.get_default_skim_dict().wrap(
            'origin', 'destination')
        skims = [od_skim_stack_wrapper]

        locals_dict = {
            "od_skims": od_skim_stack_wrapper,
            'network_los': network_los
        }
        locals_dict.update(constants)

        simulate.set_skim_wrapper_targets(tours_merged, skims)

        # this should be pre-slice as some expressions may count tours by type
        annotations = expressions.compute_columns(
            df=tours_merged,
            model_settings=preprocessor_settings,
            locals_dict=locals_dict,
            trace_label=trace_label)

        assign_in_place(tours_merged, annotations)

    tracing.print_summary('stop_frequency segments',
                          tours_merged.primary_purpose,
                          value_counts=True)

    spec_segments = model_settings.get('SPEC_SEGMENTS')
    assert spec_segments is not None, f"SPEC_SEGMENTS setting not found in model settings: {model_settings_file_name}"
    segment_col = model_settings.get('SEGMENT_COL')
    assert segment_col is not None, f"SEGMENT_COL setting not found in model settings: {model_settings_file_name}"

    nest_spec = config.get_logit_model_settings(model_settings)

    choices_list = []
    for segment_settings in spec_segments:

        segment_name = segment_settings[segment_col]
        segment_value = segment_settings[segment_col]

        chooser_segment = tours_merged[tours_merged[segment_col] ==
                                       segment_value]

        if len(chooser_segment) == 0:
            logging.info(
                f"{trace_label} skipping empty segment {segment_name}")
            continue

        logging.info(
            f"{trace_label} running segment {segment_name} with {chooser_segment.shape[0]} chooser rows"
        )

        estimator = estimation.manager.begin_estimation(
            model_name=segment_name, bundle_name='stop_frequency')

        segment_spec = simulate.read_model_spec(
            file_name=segment_settings['SPEC'])
        assert segment_spec is not None, "spec for segment_type %s not found" % segment_name

        coefficients_file_name = segment_settings['COEFFICIENTS']
        coefficients_df = simulate.read_model_coefficients(
            file_name=coefficients_file_name)
        segment_spec = simulate.eval_coefficients(segment_spec,
                                                  coefficients_df, estimator)

        if estimator:
            estimator.write_spec(segment_settings, bundle_directory=False)
            estimator.write_model_settings(model_settings,
                                           model_settings_file_name,
                                           bundle_directory=True)
            estimator.write_coefficients(coefficients_df, segment_settings)
            estimator.write_choosers(chooser_segment)

            estimator.set_chooser_id(chooser_segment.index.name)

        choices = simulate.simple_simulate(
            choosers=chooser_segment,
            spec=segment_spec,
            nest_spec=nest_spec,
            locals_d=constants,
            chunk_size=chunk_size,
            trace_label=tracing.extend_trace_label(trace_label, segment_name),
            trace_choice_name='stops',
            estimator=estimator)

        # convert indexes to alternative names
        choices = pd.Series(segment_spec.columns[choices.values],
                            index=choices.index)

        if estimator:
            estimator.write_choices(choices)
            choices = estimator.get_survey_values(
                choices, 'tours', 'stop_frequency')  # override choices
            estimator.write_override_choices(choices)
            estimator.end_estimation()

        choices_list.append(choices)

    choices = pd.concat(choices_list)

    tracing.print_summary('stop_frequency', choices, value_counts=True)

    # add stop_frequency choices to tours table
    assign_in_place(tours, choices.to_frame('stop_frequency'))

    # FIXME should have added this when tours created?
    assert 'primary_purpose' not in tours
    if 'primary_purpose' not in tours.columns:
        # if not already there, then it will have been added by annotate tours preprocessor
        assign_in_place(tours, tours_merged[['primary_purpose']])

    pipeline.replace_table("tours", tours)

    # create trips table
    trips = process_trips(tours, stop_frequency_alts)
    trips = pipeline.extend_table("trips", trips)
    tracing.register_traceable_table('trips', trips)
    pipeline.get_rn_generator().add_channel('trips', trips)

    if estimator:
        # make sure they created trips with the expected tour_ids
        columns = ['person_id', 'household_id', 'tour_id', 'outbound']

        survey_trips = estimation.manager.get_survey_table(table_name='trips')
        different = False
        survey_trips_not_in_trips = survey_trips[~survey_trips.index.
                                                 isin(trips.index)]
        if len(survey_trips_not_in_trips) > 0:
            print(f"survey_trips_not_in_trips\n{survey_trips_not_in_trips}")
            different = True
        trips_not_in_survey_trips = trips[~trips.index.isin(survey_trips.index
                                                            )]
        if len(survey_trips_not_in_trips) > 0:
            print(f"trips_not_in_survey_trips\n{trips_not_in_survey_trips}")
            different = True
        assert not different

        survey_trips = \
            estimation.manager.get_survey_values(trips,
                                                 table_name='trips',
                                                 column_names=columns)

        trips_differ = (trips[columns] != survey_trips[columns]).any(axis=1)

        if trips_differ.any():
            print("trips_differ\n%s" % trips_differ)
            print("%s of %s tours differ" %
                  (trips_differ.sum(), len(trips_differ)))
            print("differing survey_trips\n%s" % survey_trips[trips_differ])
            print("differing modeled_trips\n%s" % trips[columns][trips_differ])

        assert (not trips_differ.any())

    if trace_hh_id:
        tracing.trace_df(tours,
                         label="stop_frequency.tours",
                         slicer='person_id',
                         columns=None)

        tracing.trace_df(trips,
                         label="stop_frequency.trips",
                         slicer='person_id',
                         columns=None)

        tracing.trace_df(annotations,
                         label="stop_frequency.annotations",
                         columns=None)

        tracing.trace_df(tours_merged,
                         label="stop_frequency.tours_merged",
                         slicer='person_id',
                         columns=None)
def joint_tour_scheduling(
        tours,
        persons_merged,
        tdd_alts,
        chunk_size,
        trace_hh_id):
    """
    This model predicts the departure time and duration of each joint tour
    """
    trace_label = 'joint_tour_scheduling'
    model_settings = config.read_model_settings('joint_tour_scheduling.yaml')
    model_spec = simulate.read_model_spec(file_name='tour_scheduling_joint.csv')

    tours = tours.to_frame()
    joint_tours = tours[tours.tour_category == 'joint']

    # - if no joint tours
    if joint_tours.shape[0] == 0:
        tracing.no_results(trace_label)
        return

    # use inject.get_table as this won't exist if there are no joint_tours
    joint_tour_participants = inject.get_table('joint_tour_participants').to_frame()

    persons_merged = persons_merged.to_frame()

    logger.info("Running %s with %d joint tours", trace_label, joint_tours.shape[0])

    # it may seem peculiar that we are concerned with persons rather than households
    # but every joint tour is (somewhat arbitrarily) assigned a "primary person"
    # some of whose characteristics are used in the spec
    # and we get household attributes along with person attributes in persons_merged
    persons_merged = persons_merged[persons_merged.num_hh_joint_tours > 0]

    # since a households joint tours each potentially different participants
    # they may also have different joint tour masks (free time of all participants)
    # so we have to either chunk processing by joint_tour_num and build timetable by household
    # or build timetables by unique joint_tour

    constants = config.get_model_constants(model_settings)

    # - run preprocessor to annotate choosers
    preprocessor_settings = model_settings.get('preprocessor', None)
    if preprocessor_settings:

        locals_d = {}
        if constants is not None:
            locals_d.update(constants)

        expressions.assign_columns(
            df=joint_tours,
            model_settings=preprocessor_settings,
            locals_dict=locals_d,
            trace_label=trace_label)

    tdd_choices, timetable = vectorize_joint_tour_scheduling(
        joint_tours, joint_tour_participants,
        persons_merged,
        tdd_alts,
        spec=model_spec,
        model_settings=model_settings,
        chunk_size=chunk_size,
        trace_label=trace_label)

    timetable.replace_table()

    assign_in_place(tours, tdd_choices)
    pipeline.replace_table("tours", tours)

    # updated df for tracing
    joint_tours = tours[tours.tour_category == 'joint']

    if trace_hh_id:
        tracing.trace_df(joint_tours,
                         label="joint_tour_scheduling",
                         slicer='household_id')
Example #39
0
def compute_accessibility(land_use, accessibility, network_los, chunk_size,
                          trace_od):
    """
    Compute accessibility for each zone in land use file using expressions from accessibility_spec

    The actual results depend on the expressions in accessibility_spec, but this is initially
    intended to permit implementation of the mtc accessibility calculation as implemented by
    Accessibility.job

    Compute measures of accessibility used by the automobile ownership model.
    The accessibility measure first multiplies an employment variable by a mode-specific decay
    function.  The product reflects the difficulty of accessing the activities the farther
    (in terms of round-trip travel time) the jobs are from the location in question. The products
    to each destination zone are next summed over each origin zone, and the logarithm of the
    product mutes large differences.  The decay function on the walk accessibility measure is
    steeper than automobile or transit.  The minimum accessibility is zero.
    """

    trace_label = 'compute_accessibility'
    model_settings = config.read_model_settings('accessibility.yaml')
    assignment_spec = assign.read_assignment_spec(
        config.config_file_path('accessibility.csv'))

    accessibility_df = accessibility.to_frame()
    if len(accessibility_df.columns) > 0:
        logger.warning(
            f"accessibility table is not empty. Columns:{list(accessibility_df.columns)}"
        )
        raise RuntimeError(f"accessibility table is not empty.")

    constants = config.get_model_constants(model_settings)

    # only include the land_use columns needed by spec, as specified by land_use_columns model_setting
    land_use_columns = model_settings.get('land_use_columns', [])
    land_use_df = land_use.to_frame()
    land_use_df = land_use_df[land_use_columns]

    logger.info(
        f"Running {trace_label} with {len(accessibility_df.index)} orig zones {len(land_use_df)} dest zones"
    )

    row_size = \
        chunk_size and accessibility_calc_row_size(accessibility_df, land_use_df,
                                                   assignment_spec, network_los, trace_label)

    accessibilities_list = []

    for i, chooser_chunk, chunk_trace_label in \
            chunk.adaptive_chunked_choosers(accessibility_df, chunk_size, row_size, trace_label):

        accessibilities = \
            compute_accessibilities_for_zones(chooser_chunk, land_use_df, assignment_spec,
                                              constants, network_los, trace_od, trace_label)
        accessibilities_list.append(accessibilities)

    accessibility_df = pd.concat(accessibilities_list)

    logger.info(
        f"{trace_label} computed accessibilities {accessibility_df.shape}")

    # - write table to pipeline
    pipeline.replace_table("accessibility", accessibility_df)
Example #40
0
def free_parking(persons_merged, persons, households, skim_dict, skim_stack,
                 chunk_size, trace_hh_id, locutor):
    """

    """

    trace_label = 'free_parking'
    model_settings_file_name = 'free_parking.yaml'

    choosers = persons_merged.to_frame()
    choosers = choosers[choosers.workplace_taz > -1]
    logger.info("Running %s with %d persons", trace_label, len(choosers))

    model_settings = config.read_model_settings(model_settings_file_name)
    estimator = estimation.manager.begin_estimation('free_parking')

    constants = config.get_model_constants(model_settings)

    # - preprocessor
    preprocessor_settings = model_settings.get('preprocessor', None)
    if preprocessor_settings:

        locals_d = {}
        if constants is not None:
            locals_d.update(constants)

        expressions.assign_columns(df=choosers,
                                   model_settings=preprocessor_settings,
                                   locals_dict=locals_d,
                                   trace_label=trace_label)

    model_spec = simulate.read_model_spec(file_name=model_settings['SPEC'])
    coefficients_df = simulate.read_model_coefficients(model_settings)
    model_spec = simulate.eval_coefficients(model_spec, coefficients_df,
                                            estimator)

    nest_spec = config.get_logit_model_settings(model_settings)

    if estimator:
        estimator.write_model_settings(model_settings,
                                       model_settings_file_name)
        estimator.write_spec(model_settings)
        estimator.write_coefficients(coefficients_df)
        estimator.write_choosers(choosers)

    choices = simulate.simple_simulate(
        choosers=choosers,
        spec=model_spec,
        nest_spec=nest_spec,
        locals_d=constants,
        chunk_size=chunk_size,
        trace_label=trace_label,
        trace_choice_name='free_parking_at_work',
        estimator=estimator)

    free_parking_alt = model_settings['FREE_PARKING_ALT']
    choices = (choices == free_parking_alt)

    if estimator:
        estimator.write_choices(choices)
        choices = estimator.get_survey_values(choices, 'persons',
                                              'free_parking_at_work')
        estimator.write_override_choices(choices)
        estimator.end_estimation()

    persons = persons.to_frame()
    persons['free_parking_at_work'] = choices.reindex(
        persons.index).fillna(0).astype(bool)

    pipeline.replace_table("persons", persons)

    tracing.print_summary('free_parking',
                          persons.free_parking_at_work,
                          value_counts=True)

    if trace_hh_id:
        tracing.trace_df(persons, label=trace_label, warn_if_empty=True)
Example #41
0
def _compute_logsums(alt_tdd, tours_merged, tour_purpose, model_settings,
                     network_los, skims, trace_label):
    """
    compute logsums for tours using skims for alt_tdd out_period and in_period
    """

    trace_label = tracing.extend_trace_label(trace_label, 'logsums')

    logsum_settings = config.read_model_settings(
        model_settings['LOGSUM_SETTINGS'])

    choosers = alt_tdd.join(tours_merged, how='left', rsuffix='_chooser')
    logger.info("%s compute_logsums for %d choosers%s alts" %
                (trace_label, choosers.shape[0], alt_tdd.shape[0]))

    # - locals_dict
    constants = config.get_model_constants(logsum_settings)
    locals_dict = {}
    locals_dict.update(constants)

    if network_los.zone_system == los.THREE_ZONE:
        # TVPB constants can appear in expressions
        locals_dict.update(
            network_los.setting('TVPB_SETTINGS.tour_mode_choice.CONSTANTS'))

    locals_dict.update(skims)

    # constrained coefficients can appear in expressions
    coefficients = simulate.get_segment_coefficients(logsum_settings,
                                                     tour_purpose)
    locals_dict.update(coefficients)

    # - run preprocessor to annotate choosers
    # allow specification of alternate preprocessor for nontour choosers
    preprocessor = model_settings.get('LOGSUM_PREPROCESSOR', 'preprocessor')
    preprocessor_settings = logsum_settings[preprocessor]

    if preprocessor_settings:

        simulate.set_skim_wrapper_targets(choosers, skims)

        expressions.assign_columns(df=choosers,
                                   model_settings=preprocessor_settings,
                                   locals_dict=locals_dict,
                                   trace_label=trace_label)

    # - compute logsums
    logsum_spec = simulate.read_model_spec(file_name=logsum_settings['SPEC'])
    logsum_spec = simulate.eval_coefficients(logsum_spec,
                                             coefficients,
                                             estimator=None)

    nest_spec = config.get_logit_model_settings(logsum_settings)
    nest_spec = simulate.eval_nest_coefficients(nest_spec, coefficients)

    logsums = simulate.simple_simulate_logsums(choosers,
                                               logsum_spec,
                                               nest_spec,
                                               skims=skims,
                                               locals_d=locals_dict,
                                               chunk_size=0,
                                               trace_label=trace_label)

    return logsums
Example #42
0
def run_trip_destination(
        trips,
        tours_merged,
        chunk_size, trace_hh_id,
        trace_label):
    """
    trip destination - main functionality separated from model step so it can be called iteratively

    Run the trip_destination model, assigning destinations for each (intermediate) trip
    (last trips already have a destination - either the tour primary destination or Home)

    Set trip destination and origin columns, and a boolean failed flag for any failed trips
    (destination for flagged failed trips will be set to -1)

    Parameters
    ----------
    trips
    tours_merged
    chunk_size
    trace_hh_id
    trace_label

    Returns
    -------

    """

    model_settings = config.read_model_settings('trip_destination.yaml')
    preprocessor_settings = model_settings.get('preprocessor', None)
    logsum_settings = config.read_model_settings(model_settings['LOGSUM_SETTINGS'])

    land_use = inject.get_table('land_use')
    size_terms = inject.get_injectable('size_terms')

    # - initialize trip origin and destination to those of half-tour
    # (we will sequentially adjust intermediate trips origin and destination as we choose them)
    tour_destination = reindex(tours_merged.destination, trips.tour_id).astype(int)
    tour_origin = reindex(tours_merged.origin, trips.tour_id).astype(int)
    trips['destination'] = np.where(trips.outbound, tour_destination, tour_origin)
    trips['origin'] = np.where(trips.outbound, tour_origin, tour_destination)
    trips['failed'] = False

    trips = trips.sort_index()
    trips['next_trip_id'] = np.roll(trips.index, -1)
    trips.next_trip_id = trips.next_trip_id.where(trips.trip_num < trips.trip_count, 0)

    # - filter tours_merged (AFTER copying destination and origin columns to trips)
    # tours_merged is used for logsums, we filter it here upfront to save space and time
    tours_merged_cols = logsum_settings['TOURS_MERGED_CHOOSER_COLUMNS']
    if 'REDUNDANT_TOURS_MERGED_CHOOSER_COLUMNS' in model_settings:
        redundant_cols = model_settings['REDUNDANT_TOURS_MERGED_CHOOSER_COLUMNS']
        tours_merged_cols = [c for c in tours_merged_cols if c not in redundant_cols]
    tours_merged = tours_merged[tours_merged_cols]

    # - skims
    skims = wrap_skims(model_settings)

    # - size_terms and alternatives
    alternatives = tour_destination_size_terms(land_use, size_terms, 'trip')

    # DataFrameMatrix alows us to treat dataframe as virtual a 2-D array, indexed by TAZ, purpose
    # e.g. size_terms.get(df.dest_taz, df.purpose)
    # returns a series of size_terms for each chooser's dest_taz and purpose with chooser index
    size_term_matrix = DataFrameMatrix(alternatives)

    # don't need size terms in alternatives, just TAZ index
    alternatives = alternatives.drop(alternatives.columns, axis=1)
    alternatives.index.name = model_settings['ALT_DEST']

    # - process intermediate trips in ascending trip_num order
    intermediate = trips.trip_num < trips.trip_count
    if intermediate.any():

        first_trip_num = trips[intermediate].trip_num.min()
        last_trip_num = trips[intermediate].trip_num.max()

        # iterate over trips in ascending trip_num order
        for trip_num in range(first_trip_num, last_trip_num + 1):

            nth_trips = trips[intermediate & (trips.trip_num == trip_num)]
            nth_trace_label = tracing.extend_trace_label(trace_label, 'trip_num_%s' % trip_num)

            # - annotate nth_trips
            if preprocessor_settings:
                expressions.assign_columns(
                    df=nth_trips,
                    model_settings=preprocessor_settings,
                    locals_dict=config.get_model_constants(model_settings),
                    trace_label=nth_trace_label)

            logger.info("Running %s with %d trips", nth_trace_label, nth_trips.shape[0])

            # - choose destination for nth_trips, segmented by primary_purpose
            choices_list = []
            for primary_purpose, trips_segment in nth_trips.groupby('primary_purpose'):
                choices = choose_trip_destination(
                    primary_purpose,
                    trips_segment,
                    alternatives,
                    tours_merged,
                    model_settings,
                    size_term_matrix, skims,
                    chunk_size, trace_hh_id,
                    trace_label=tracing.extend_trace_label(nth_trace_label, primary_purpose))

                choices_list.append(choices)

            destinations = pd.concat(choices_list)

            failed_trip_ids = nth_trips.index.difference(destinations.index)
            if failed_trip_ids.any():
                logger.warning("%s sidelining %s trips without viable destination alternatives" %
                               (nth_trace_label, failed_trip_ids.shape[0]))
                next_trip_ids = nth_trips.next_trip_id.reindex(failed_trip_ids)
                trips.loc[failed_trip_ids, 'failed'] = True
                trips.loc[failed_trip_ids, 'destination'] = -1
                trips.loc[next_trip_ids, 'origin'] = trips.loc[failed_trip_ids].origin.values

            # - assign choices to these trips destinations and to next trips origin
            assign_in_place(trips, destinations.to_frame('destination'))
            destinations.index = nth_trips.next_trip_id.reindex(destinations.index)
            assign_in_place(trips, destinations.to_frame('origin'))

    del trips['next_trip_id']

    return trips
Example #43
0
def joint_tour_scheduling(tours, persons_merged, tdd_alts, chunk_size,
                          trace_hh_id):
    """
    This model predicts the departure time and duration of each joint tour
    """
    trace_label = 'joint_tour_scheduling'

    model_settings_file_name = 'joint_tour_scheduling.yaml'
    model_settings = config.read_model_settings(model_settings_file_name)

    tours = tours.to_frame()
    joint_tours = tours[tours.tour_category == 'joint']

    # - if no joint tours
    if joint_tours.shape[0] == 0:
        tracing.no_results(trace_label)
        return

    # use inject.get_table as this won't exist if there are no joint_tours
    joint_tour_participants = inject.get_table(
        'joint_tour_participants').to_frame()

    persons_merged = persons_merged.to_frame()

    logger.info("Running %s with %d joint tours", trace_label,
                joint_tours.shape[0])

    # it may seem peculiar that we are concerned with persons rather than households
    # but every joint tour is (somewhat arbitrarily) assigned a "primary person"
    # some of whose characteristics are used in the spec
    # and we get household attributes along with person attributes in persons_merged
    persons_merged = persons_merged[persons_merged.num_hh_joint_tours > 0]

    # since a households joint tours each potentially different participants
    # they may also have different joint tour masks (free time of all participants)
    # so we have to either chunk processing by joint_tour_num and build timetable by household
    # or build timetables by unique joint_tour

    constants = config.get_model_constants(model_settings)

    # - run preprocessor to annotate choosers
    preprocessor_settings = model_settings.get('preprocessor', None)
    if preprocessor_settings:

        locals_d = {}
        if constants is not None:
            locals_d.update(constants)

        expressions.assign_columns(df=joint_tours,
                                   model_settings=preprocessor_settings,
                                   locals_dict=locals_d,
                                   trace_label=trace_label)

    timetable = inject.get_injectable("timetable")

    estimator = estimation.manager.begin_estimation('joint_tour_scheduling')

    model_spec = simulate.read_model_spec(file_name=model_settings['SPEC'])
    coefficients_df = simulate.read_model_coefficients(model_settings)
    model_spec = simulate.eval_coefficients(model_spec, coefficients_df,
                                            estimator)

    if estimator:
        estimator.write_model_settings(model_settings,
                                       model_settings_file_name)
        estimator.write_spec(model_settings)
        estimator.write_coefficients(coefficients_df)
        timetable.begin_transaction(estimator)

    choices = vectorize_joint_tour_scheduling(joint_tours,
                                              joint_tour_participants,
                                              persons_merged,
                                              tdd_alts,
                                              timetable,
                                              spec=model_spec,
                                              model_settings=model_settings,
                                              estimator=estimator,
                                              chunk_size=chunk_size,
                                              trace_label=trace_label)

    if estimator:
        estimator.write_choices(choices)
        choices = estimator.get_survey_values(choices, 'tours', 'tdd')
        estimator.write_override_choices(choices)
        estimator.end_estimation()

        # update timetable to reflect the override choices (assign tours in tour_num order)
        timetable.rollback()
        for tour_num, nth_tours in joint_tours.groupby('tour_num', sort=True):
            nth_participants = \
                joint_tour_participants[joint_tour_participants.tour_id.isin(nth_tours.index)]

            estimator.log(
                "assign timetable for %s participants in %s tours with tour_num %s"
                % (len(nth_participants), len(nth_tours), tour_num))
            # - update timetables of all joint tour participants
            timetable.assign(nth_participants.person_id,
                             reindex(choices, nth_participants.tour_id))

    timetable.replace_table()

    # choices are tdd alternative ids
    # we want to add start, end, and duration columns to tours, which we have in tdd_alts table
    choices = pd.merge(choices.to_frame('tdd'),
                       tdd_alts,
                       left_on=['tdd'],
                       right_index=True,
                       how='left')

    assign_in_place(tours, choices)
    pipeline.replace_table("tours", tours)

    # updated df for tracing
    joint_tours = tours[tours.tour_category == 'joint']

    if trace_hh_id:
        tracing.trace_df(joint_tours,
                         label="joint_tour_scheduling",
                         slicer='household_id')
Example #44
0
def best_transit_path(set_random_seed,
                      network_los,
                      best_transit_path_spec):

    model_settings = config.read_model_settings('best_transit_path.yaml')

    logger.info("best_transit_path VECTOR_TEST_SIZE %s", VECTOR_TEST_SIZE)

    omaz = network_los.maz_df.sample(VECTOR_TEST_SIZE, replace=True).index
    dmaz = network_los.maz_df.sample(VECTOR_TEST_SIZE, replace=True).index
    tod = np.random.choice(['AM', 'PM'], VECTOR_TEST_SIZE)
    od_df = pd.DataFrame({'omaz': omaz, 'dmaz': dmaz, 'tod': tod})

    trace_od = (od_df.omaz[0], od_df.dmaz[0])
    logger.info("trace_od omaz %s dmaz %s" % trace_od)

    # build exploded atap_btap_df

    # FIXME - pathological knowledge about mode - should be parameterized
    # filter out rows with no drive time omaz-btap or no walk time from dmaz-atap
    atap_btap_df = network_los.get_tappairs_mazpairs(od_df.omaz, od_df.dmaz,
                                                     ofilter='drive_time',
                                                     dfilter='walk_alightingActual')

    # add in tod column
    atap_btap_df = atap_btap_df.merge(
        right=od_df[['tod']],
        left_on='idx',
        right_index=True,
        how='left'
    )

    logger.info("len od_df %s", len(od_df.index))
    logger.info("len atap_btap_df %s", len(atap_btap_df.index))
    logger.info("avg explosion %s", (len(atap_btap_df.index) / (1.0 * len(od_df.index))))

    if trace_od:
        trace_orig, trace_dest = trace_od
        trace_oabd_rows = (atap_btap_df.omaz == trace_orig) & (atap_btap_df.dmaz == trace_dest)
    else:
        trace_oabd_rows = None

    constants = config.get_model_constants(model_settings)
    locals_d = {
        'np': np,
        'network_los': network_los
    }
    if constants is not None:
        locals_d.update(constants)

    results, trace_results, trace_assigned_locals \
        = assign.assign_variables(best_transit_path_spec, atap_btap_df, locals_d,
                                  trace_rows=trace_oabd_rows)

    # copy results
    for column in results.columns:
        atap_btap_df[column] = results[column]

    # drop rows if no utility
    n = len(atap_btap_df.index)
    atap_btap_df = atap_btap_df.dropna(subset=['utility'])

    logger.info("Dropped %s of %s rows with null utility", n - len(atap_btap_df.index), n)

    # choose max utility
    atap_btap_df = atap_btap_df.sort_values(by='utility').groupby('idx').tail(1)

    if trace_od:

        if not trace_oabd_rows.any():
            logger.warning("trace_od not found origin = %s, dest = %s", trace_orig, trace_dest)
        else:

            tracing.trace_df(atap_btap_df,
                             label='best_transit_path',
                             slicer='NONE',
                             transpose=False)

            tracing.trace_df(trace_results,
                             label='trace_best_transit_path',
                             slicer='NONE',
                             transpose=False)

            if trace_assigned_locals:
                tracing.write_csv(trace_assigned_locals, file_name="trace_best_transit_path_locals")
Example #45
0
def workplace_location_simulate(persons_merged, workplace_location_sample,
                                workplace_location_spec,
                                workplace_location_settings, skim_dict,
                                destination_size_terms, chunk_size,
                                trace_hh_id):
    """
    Workplace location model on workplace_location_sample annotated with mode_choice logsum
    to select a work_taz from sample alternatives
    """

    # for now I'm going to generate a workplace location for everyone -
    # presumably it will not get used in downstream models for everyone -
    # it should depend on CDAP and mandatory tour generation as to whether
    # it gets used
    choosers = persons_merged.to_frame()

    alt_col_name = workplace_location_settings["ALT_COL_NAME"]

    # alternatives are pre-sampled and annotated with logsums and pick_count
    # but we have to merge additional alt columns into alt sample list
    workplace_location_sample = workplace_location_sample.to_frame()
    destination_size_terms = destination_size_terms.to_frame()
    alternatives = \
        pd.merge(workplace_location_sample, destination_size_terms,
                 left_on=alt_col_name, right_index=True, how="left")

    tracing.dump_df(DUMP, alternatives, 'workplace_location_simulate',
                    'alternatives')

    constants = config.get_model_constants(workplace_location_settings)

    sample_pool_size = len(destination_size_terms.index)

    logger.info("Running workplace_location_simulate with %d persons" %
                len(choosers))

    # create wrapper with keys for this lookup - in this case there is a TAZ in the choosers
    # and a TAZ in the alternatives which get merged during interaction
    # the skims will be available under the name "skims" for any @ expressions
    skims = skim_dict.wrap("TAZ", alt_col_name)

    locals_d = {'skims': skims, 'sample_pool_size': float(sample_pool_size)}
    if constants is not None:
        locals_d.update(constants)

    # FIXME - MEMORY HACK - only include columns actually used in spec
    chooser_columns = workplace_location_settings['SIMULATE_CHOOSER_COLUMNS']
    choosers = choosers[chooser_columns]

    tracing.dump_df(DUMP, choosers, 'workplace_location_simulate', 'choosers')

    choices = interaction_sample_simulate(
        choosers,
        alternatives,
        spec=workplace_location_spec,
        choice_column=alt_col_name,
        skims=skims,
        locals_d=locals_d,
        chunk_size=chunk_size,
        trace_label=trace_hh_id and 'workplace_location',
        trace_choice_name='workplace_location')

    # FIXME - no need to reindex since we didn't slice choosers
    # choices = choices.reindex(persons_merged.index)

    tracing.print_summary('workplace_taz', choices, describe=True)

    inject.add_column("persons", "workplace_taz", choices)

    pipeline.add_dependent_columns("persons", "persons_workplace")

    if trace_hh_id:
        trace_columns = ['workplace_taz'
                         ] + inject.get_table('persons_workplace').columns
        tracing.trace_df(inject.get_table('persons_merged').to_frame(),
                         label="workplace_location",
                         columns=trace_columns,
                         warn_if_empty=True)
Example #46
0
def run_location_sample(
        segment_name,
        persons_merged,
        skim_dict,
        dest_size_terms,
        model_settings,
        chunk_size, trace_label):
    """
    select a sample of alternative locations.

    Logsum calculations are expensive, so we build a table of persons * all zones
    and then select a sample subset of potential locations

    The sample subset is generated by making multiple choices (<sample_size> number of choices)
    which results in sample containing up to <sample_size> choices for each choose (e.g. person)
    and a pick_count indicating how many times that choice was selected for that chooser.)

    person_id,  dest_TAZ, rand,            pick_count
    23750,      14,       0.565502716034,  4
    23750,      16,       0.711135838871,  6
    ...
    23751,      12,       0.408038878552,  1
    23751,      14,       0.972732479292,  2
    """
    assert not persons_merged.empty

    model_spec = simulate.read_model_spec(file_name=model_settings['SAMPLE_SPEC'])

    # FIXME - MEMORY HACK - only include columns actually used in spec
    chooser_columns = model_settings['SIMULATE_CHOOSER_COLUMNS']
    choosers = persons_merged[chooser_columns]

    alternatives = dest_size_terms

    sample_size = model_settings["SAMPLE_SIZE"]
    alt_dest_col_name = model_settings["ALT_DEST_COL_NAME"]

    logger.info("Running %s with %d persons" % (trace_label, len(choosers.index)))

    # create wrapper with keys for this lookup - in this case there is a TAZ in the choosers
    # and a TAZ in the alternatives which get merged during interaction
    # (logit.interaction_dataset suffixes duplicate chooser column with '_chooser')
    # the skims will be available under the name "skims" for any @ expressions
    skims = skim_dict.wrap('TAZ_chooser', 'TAZ')

    locals_d = {
        'skims': skims,
        'segment_size': segment_name
    }
    constants = config.get_model_constants(model_settings)
    if constants is not None:
        locals_d.update(constants)

    choices = interaction_sample(
        choosers,
        alternatives,
        sample_size=sample_size,
        alt_col_name=alt_dest_col_name,
        spec=spec_for_segment(model_spec, segment_name),
        skims=skims,
        locals_d=locals_d,
        chunk_size=chunk_size,
        trace_label=trace_label)

    return choices
def joint_tour_composition(
        tours, households, persons,
        chunk_size,
        trace_hh_id):
    """
    This model predicts the makeup of the travel party (adults, children, or mixed).
    """
    trace_label = 'joint_tour_composition'

    model_settings = config.read_model_settings('joint_tour_composition.yaml')
    model_spec = simulate.read_model_spec(file_name='joint_tour_composition.csv')

    tours = tours.to_frame()
    joint_tours = tours[tours.tour_category == 'joint']

    # - if no joint tours
    if joint_tours.shape[0] == 0:
        add_null_results(trace_label, tours)
        return

    # - only interested in households with joint_tours
    households = households.to_frame()
    households = households[households.num_hh_joint_tours > 0]

    persons = persons.to_frame()
    persons = persons[persons.household_id.isin(households.index)]

    logger.info("Running joint_tour_composition with %d joint tours" % joint_tours.shape[0])

    # - run preprocessor
    preprocessor_settings = model_settings.get('preprocessor', None)
    if preprocessor_settings:

        locals_dict = {
            'persons': persons,
            'hh_time_window_overlap': hh_time_window_overlap
        }

        expressions.assign_columns(
            df=households,
            model_settings=preprocessor_settings,
            locals_dict=locals_dict,
            trace_label=trace_label)

    joint_tours_merged = pd.merge(joint_tours, households,
                                  left_on='household_id', right_index=True, how='left')

    # - simple_simulate

    nest_spec = config.get_logit_model_settings(model_settings)
    constants = config.get_model_constants(model_settings)

    choices = simulate.simple_simulate(
        choosers=joint_tours_merged,
        spec=model_spec,
        nest_spec=nest_spec,
        locals_d=constants,
        chunk_size=chunk_size,
        trace_label=trace_label,
        trace_choice_name='composition')

    # convert indexes to alternative names
    choices = pd.Series(model_spec.columns[choices.values], index=choices.index)

    # add composition column to tours for tracing
    joint_tours['composition'] = choices

    # reindex since we ran model on a subset of households
    tours['composition'] = choices.reindex(tours.index).fillna('').astype(str)
    pipeline.replace_table("tours", tours)

    tracing.print_summary('joint_tour_composition', joint_tours.composition,
                          value_counts=True)

    if trace_hh_id:
        tracing.trace_df(joint_tours,
                         label="joint_tour_composition.joint_tours",
                         slicer='household_id')
def mandatory_tour_frequency(persons_merged,
                             chunk_size,
                             trace_hh_id):
    """
    This model predicts the frequency of making mandatory trips (see the
    alternatives above) - these trips include work and school in some combination.
    """
    trace_label = 'mandatory_tour_frequency'

    model_settings = config.read_model_settings('mandatory_tour_frequency.yaml')
    model_spec = simulate.read_model_spec(file_name='mandatory_tour_frequency.csv')
    alternatives = simulate.read_model_alts(
        config.config_file_path('mandatory_tour_frequency_alternatives.csv'), set_index='alt')

    choosers = persons_merged.to_frame()
    # filter based on results of CDAP
    choosers = choosers[choosers.cdap_activity == 'M']
    logger.info("Running mandatory_tour_frequency with %d persons", len(choosers))

    # - if no mandatory tours
    if choosers.shape[0] == 0:
        add_null_results(trace_label, model_settings)
        return

    # - preprocessor
    preprocessor_settings = model_settings.get('preprocessor', None)
    if preprocessor_settings:

        locals_dict = {}

        expressions.assign_columns(
            df=choosers,
            model_settings=preprocessor_settings,
            locals_dict=locals_dict,
            trace_label=trace_label)

    nest_spec = config.get_logit_model_settings(model_settings)
    constants = config.get_model_constants(model_settings)

    choices = simulate.simple_simulate(
        choosers=choosers,
        spec=model_spec,
        nest_spec=nest_spec,
        locals_d=constants,
        chunk_size=chunk_size,
        trace_label=trace_label,
        trace_choice_name='mandatory_tour_frequency')

    # convert indexes to alternative names
    choices = pd.Series(
        model_spec.columns[choices.values],
        index=choices.index).reindex(persons_merged.local.index)

    # - create mandatory tours
    """
    This reprocesses the choice of index of the mandatory tour frequency
    alternatives into an actual dataframe of tours.  Ending format is
    the same as got non_mandatory_tours except trip types are "work" and "school"
    """
    choosers['mandatory_tour_frequency'] = choices
    mandatory_tours = process_mandatory_tours(
        persons=choosers,
        mandatory_tour_frequency_alts=alternatives
    )

    tours = pipeline.extend_table("tours", mandatory_tours)
    tracing.register_traceable_table('tours', mandatory_tours)
    pipeline.get_rn_generator().add_channel('tours', mandatory_tours)

    # - annotate persons
    persons = inject.get_table('persons').to_frame()

    # need to reindex as we only handled persons with cdap_activity == 'M'
    persons['mandatory_tour_frequency'] = choices.reindex(persons.index).fillna('').astype(str)

    expressions.assign_columns(
        df=persons,
        model_settings=model_settings.get('annotate_persons'),
        trace_label=tracing.extend_trace_label(trace_label, 'annotate_persons'))

    pipeline.replace_table("persons", persons)

    tracing.print_summary('mandatory_tour_frequency', persons.mandatory_tour_frequency,
                          value_counts=True)

    if trace_hh_id:
        tracing.trace_df(mandatory_tours,
                         label="mandatory_tour_frequency.mandatory_tours",
                         warn_if_empty=True)

        tracing.trace_df(persons,
                         label="mandatory_tour_frequency.persons",
                         warn_if_empty=True)
Example #49
0
def parking_location(trips, trips_merged, land_use, network_los, chunk_size,
                     trace_hh_id):
    """
    Given a set of trips, each trip needs to have a parking location if
    it is eligible for remote parking.
    """

    trace_label = 'parking_location'
    model_settings = config.read_model_settings('parking_location_choice.yaml')
    alt_destination_col_name = model_settings['ALT_DEST_COL_NAME']

    preprocessor_settings = model_settings.get('PREPROCESSOR', None)

    trips_df = trips.to_frame()
    trips_merged_df = trips_merged.to_frame()
    land_use_df = land_use.to_frame()

    locals_dict = {'network_los': network_los}
    locals_dict.update(config.get_model_constants(model_settings))

    if preprocessor_settings:
        expressions.assign_columns(df=trips_merged_df,
                                   model_settings=preprocessor_settings,
                                   locals_dict=locals_dict,
                                   trace_label=trace_label)

    parking_locations, save_sample_df = run_parking_destination(
        model_settings,
        trips_merged_df,
        land_use_df,
        chunk_size=chunk_size,
        trace_hh_id=trace_hh_id,
        trace_label=trace_label,
    )

    assign_in_place(trips_df,
                    parking_locations.to_frame(alt_destination_col_name))

    pipeline.replace_table("trips", trips_df)

    if trace_hh_id:
        tracing.trace_df(trips_df,
                         label=trace_label,
                         slicer='trip_id',
                         index_label='trip_id',
                         warn_if_empty=True)

    if save_sample_df is not None:
        assert len(save_sample_df.index.get_level_values(0).unique()) == \
               len(trips_df[trips_df.trip_num < trips_df.trip_count])

        sample_table_name = model_settings.get(
            'PARKING_LOCATION_SAMPLE_TABLE_NAME')
        assert sample_table_name is not None

        logger.info("adding %s samples to %s" %
                    (len(save_sample_df), sample_table_name))

        # lest they try to put tour samples into the same table
        if pipeline.is_table(sample_table_name):
            raise RuntimeError("sample table %s already exists" %
                               sample_table_name)
        pipeline.extend_table(sample_table_name, save_sample_df)
def _schedule_tours(
        tours, persons_merged, alts,
        spec, logsum_tour_purpose,
        model_settings,
        timetable, window_id_col,
        previous_tour, tour_owner_id_col,
        tour_trace_label):
    """
    previous_tour stores values used to add columns that can be used in the spec
    which have to do with the previous tours per person.  Every column in the
    alternatives table is appended with the suffix "_previous" and made
    available.  So if your alternatives table has columns for start and end,
    then start_previous and end_previous will be set to the start and end of
    the most recent tour for a person.  The first time through,
    start_previous and end_previous are undefined, so make sure to protect
    with a tour_num >= 2 in the variable computation.

    Parameters
    ----------
    tours : DataFrame
        chunk of tours to schedule with unique timetable window_id_col
    persons_merged : DataFrame
        DataFrame of persons to be merged with tours containing attributes referenced
        by expressions in spec
    alts : DataFrame
        DataFrame of alternatives which represent all possible time slots.
        tdd_interaction_dataset function will use timetable to filter them to omit
        unavailable alternatives
    spec : DataFrame
        The spec which will be passed to interaction_simulate.
    model_settings : dict
    timetable : TimeTable
        timetable of timewidows for person (or subtour) with rows for tours[window_id_col]
    window_id_col : str
        column name from tours that identifies timetable owner (or None if tours index)
        - person_id for non/mandatory tours
        - parent_tour_id for subtours,
        - None (tours index) for joint_tours since every tour may have different participants)
    previous_tour: Series
        series with value of tdd_alt choice for last previous tour scheduled for
    tour_owner_id_col : str
        column name from tours that identifies 'owner' of this tour
        (person_id for non/mandatory tours, parent_tour_id for subtours,
        household_id for joint_tours)
    tour_trace_label

    Returns
    -------

    """

    logger.info("%s schedule_tours running %d tour choices" % (tour_trace_label, len(tours)))

    # merge persons into tours
    # avoid dual suffix for redundant columns names (e.g. household_id) that appear in both
    tours = pd.merge(tours, persons_merged, left_on='person_id', right_index=True,
                     suffixes=('', '_y'))
    chunk.log_df(tour_trace_label, "tours", tours)

    # - add explicit window_id_col for timetable owner if it is index
    # if no timetable window_id_col specified, then add index as an explicit column
    # (this is not strictly necessary but its presence makes code simpler in several places)
    if window_id_col is None:
        window_id_col = tours.index.name
        tours[window_id_col] = tours.index

    # timetable can't handle multiple tours per window_id
    assert not tours[window_id_col].duplicated().any()

    # - build interaction dataset filtered to include only available tdd alts
    # dataframe columns start, end , duration, person_id, tdd
    # indexed (not unique) on tour_id
    choice_column = 'tdd'
    alt_tdd = tdd_interaction_dataset(tours, alts, timetable, choice_column, window_id_col,
                                      tour_trace_label)
    chunk.log_df(tour_trace_label, "alt_tdd", alt_tdd)

    # - add logsums
    if logsum_tour_purpose:
        logsums = \
            compute_logsums(alt_tdd, tours, logsum_tour_purpose, model_settings, tour_trace_label)
    else:
        logsums = 0
    alt_tdd['mode_choice_logsum'] = logsums

    # - merge in previous tour columns
    # adds start_previous and end_previous, joins on index
    tours = \
        tours.join(get_previous_tour_by_tourid(tours[tour_owner_id_col], previous_tour, alts))
    chunk.log_df(tour_trace_label, "tours", tours)

    # - make choices
    locals_d = {
        'tt': timetable
    }
    constants = config.get_model_constants(model_settings)
    if constants is not None:
        locals_d.update(constants)

    choices = interaction_sample_simulate(
        tours,
        alt_tdd,
        spec,
        choice_column=choice_column,
        locals_d=locals_d,
        chunk_size=0,
        trace_label=tour_trace_label
    )

    # - update previous_tour and timetable parameters

    # update previous_tour (series with most recent previous tdd choices) with latest values
    previous_tour.loc[tours[tour_owner_id_col]] = choices.values

    # update timetable with chosen tdd footprints
    timetable.assign(tours[window_id_col], choices)

    return choices
Example #51
0
def compute_accessibility(accessibility, skim_dict, land_use, trace_od):

    """
    Compute accessibility for each zone in land use file using expressions from accessibility_spec

    The actual results depend on the expressions in accessibility_spec, but this is initially
    intended to permit implementation of the mtc accessibility calculation as implemented by
    Accessibility.job

    Compute measures of accessibility used by the automobile ownership model.
    The accessibility measure first multiplies an employment variable by a mode-specific decay
    function.  The product reflects the difficulty of accessing the activities the farther
    (in terms of round-trip travel time) the jobs are from the location in question. The products
    to each destination zone are next summed over each origin zone, and the logarithm of the
    product mutes large differences.  The decay function on the walk accessibility measure is
    steeper than automobile or transit.  The minimum accessibility is zero.
    """

    trace_label = 'compute_accessibility'
    model_settings = config.read_model_settings('accessibility.yaml')
    assignment_spec = assign.read_assignment_spec(config.config_file_path('accessibility.csv'))

    accessibility_df = accessibility.to_frame()

    logger.info("Running %s with %d dest zones" % (trace_label, len(accessibility_df)))

    constants = config.get_model_constants(model_settings)
    land_use_columns = model_settings.get('land_use_columns', [])

    land_use_df = land_use.to_frame()

    # #bug
    #
    # land_use_df = land_use_df[land_use_df.index % 2 == 1]
    # accessibility_df = accessibility_df[accessibility_df.index.isin(land_use_df.index)].head(5)
    #
    # print "land_use_df", land_use_df.index
    # print "accessibility_df", accessibility_df.index
    # #bug

    orig_zones = accessibility_df.index.values
    dest_zones = land_use_df.index.values

    orig_zone_count = len(orig_zones)
    dest_zone_count = len(dest_zones)

    logger.info("Running %s with %d dest zones %d orig zones" %
                (trace_label, dest_zone_count, orig_zone_count))

    # create OD dataframe
    od_df = pd.DataFrame(
        data={
            'orig': np.repeat(np.asanyarray(accessibility_df.index), dest_zone_count),
            'dest': np.tile(np.asanyarray(land_use_df.index), orig_zone_count)
        }
    )

    if trace_od:
        trace_orig, trace_dest = trace_od
        trace_od_rows = (od_df.orig == trace_orig) & (od_df.dest == trace_dest)
    else:
        trace_od_rows = None

    # merge land_use_columns into od_df
    land_use_df = land_use_df[land_use_columns]
    od_df = pd.merge(od_df, land_use_df, left_on='dest', right_index=True).sort_index()

    locals_d = {
        'log': np.log,
        'exp': np.exp,
        'skim_od': AccessibilitySkims(skim_dict, orig_zones, dest_zones),
        'skim_do': AccessibilitySkims(skim_dict, orig_zones, dest_zones, transpose=True)
    }
    if constants is not None:
        locals_d.update(constants)

    results, trace_results, trace_assigned_locals \
        = assign.assign_variables(assignment_spec, od_df, locals_d, trace_rows=trace_od_rows)

    for column in results.columns:
        data = np.asanyarray(results[column])
        data.shape = (orig_zone_count, dest_zone_count)
        accessibility_df[column] = np.log(np.sum(data, axis=1) + 1)

    # - write table to pipeline
    pipeline.replace_table("accessibility", accessibility_df)

    if trace_od:

        if not trace_od_rows.any():
            logger.warning("trace_od not found origin = %s, dest = %s" % (trace_orig, trace_dest))
        else:

            # add OD columns to trace results
            df = pd.concat([od_df[trace_od_rows], trace_results], axis=1)

            # dump the trace results table (with _temp variables) to aid debugging
            tracing.trace_df(df,
                             label='accessibility',
                             index_label='skim_offset',
                             slicer='NONE',
                             warn_if_empty=True)

            if trace_assigned_locals:
                tracing.write_csv(trace_assigned_locals, file_name="accessibility_locals")
def atwork_subtour_mode_choice(
        tours,
        persons_merged,
        skim_dict, skim_stack,
        chunk_size,
        trace_hh_id):
    """
    At-work subtour mode choice simulate
    """

    trace_label = 'atwork_subtour_mode_choice'

    model_settings = config.read_model_settings('tour_mode_choice.yaml')

    spec = tour_mode_choice_spec(model_settings)

    tours = tours.to_frame()
    subtours = tours[tours.tour_category == 'atwork']

    # - if no atwork subtours
    if subtours.shape[0] == 0:
        tracing.no_results(trace_label)
        return

    subtours_merged = \
        pd.merge(subtours, persons_merged.to_frame(),
                 left_on='person_id', right_index=True, how='left')

    nest_spec = config.get_logit_model_settings(model_settings)
    constants = config.get_model_constants(model_settings)

    logger.info("Running %s with %d subtours" % (trace_label, subtours_merged.shape[0]))

    tracing.print_summary('%s tour_type' % trace_label,
                          subtours_merged.tour_type, value_counts=True)

    # setup skim keys
    orig_col_name = 'workplace_taz'
    dest_col_name = 'destination'
    out_time_col_name = 'start'
    in_time_col_name = 'end'
    odt_skim_stack_wrapper = skim_stack.wrap(left_key=orig_col_name, right_key=dest_col_name,
                                             skim_key='out_period')
    dot_skim_stack_wrapper = skim_stack.wrap(left_key=dest_col_name, right_key=orig_col_name,
                                             skim_key='in_period')
    od_skim_stack_wrapper = skim_dict.wrap(orig_col_name, dest_col_name)

    skims = {
        "odt_skims": odt_skim_stack_wrapper,
        "dot_skims": dot_skim_stack_wrapper,
        "od_skims": od_skim_stack_wrapper,
        'orig_col_name': orig_col_name,
        'dest_col_name': dest_col_name,
        'out_time_col_name': out_time_col_name,
        'in_time_col_name': in_time_col_name
    }

    choices = run_tour_mode_choice_simulate(
        subtours_merged,
        spec, tour_purpose='atwork', model_settings=model_settings,
        skims=skims,
        constants=constants,
        nest_spec=nest_spec,
        chunk_size=chunk_size,
        trace_label=trace_label,
        trace_choice_name='tour_mode_choice')

    tracing.print_summary('%s choices' % trace_label, choices, value_counts=True)

    assign_in_place(tours, choices.to_frame('tour_mode'))
    pipeline.replace_table("tours", tours)

    if trace_hh_id:
        tracing.trace_df(tours[tours.tour_category == 'atwork'],
                         label=tracing.extend_trace_label(trace_label, 'tour_mode'),
                         slicer='tour_id',
                         index_label='tour_id')

    force_garbage_collect()
def mandatory_tour_frequency(persons_merged, chunk_size, trace_hh_id):
    """
    This model predicts the frequency of making mandatory trips (see the
    alternatives above) - these trips include work and school in some combination.
    """
    trace_label = 'mandatory_tour_frequency'
    model_settings_file_name = 'mandatory_tour_frequency.yaml'

    model_settings = config.read_model_settings(model_settings_file_name)

    choosers = persons_merged.to_frame()
    # filter based on results of CDAP
    choosers = choosers[choosers.cdap_activity == 'M']
    logger.info("Running mandatory_tour_frequency with %d persons",
                len(choosers))

    # - if no mandatory tours
    if choosers.shape[0] == 0:
        add_null_results(trace_label, model_settings)
        return

    # - preprocessor
    preprocessor_settings = model_settings.get('preprocessor', None)
    if preprocessor_settings:

        locals_dict = {}

        expressions.assign_columns(df=choosers,
                                   model_settings=preprocessor_settings,
                                   locals_dict=locals_dict,
                                   trace_label=trace_label)

    estimator = estimation.manager.begin_estimation('mandatory_tour_frequency')

    model_spec = simulate.read_model_spec(file_name=model_settings['SPEC'])
    coefficients_df = simulate.read_model_coefficients(model_settings)
    model_spec = simulate.eval_coefficients(model_spec, coefficients_df,
                                            estimator)

    nest_spec = config.get_logit_model_settings(model_settings)
    constants = config.get_model_constants(model_settings)

    if estimator:
        estimator.write_spec(model_settings)
        estimator.write_model_settings(model_settings,
                                       model_settings_file_name)
        estimator.write_coefficients(coefficients_df)
        estimator.write_choosers(choosers)

    choices = simulate.simple_simulate(
        choosers=choosers,
        spec=model_spec,
        nest_spec=nest_spec,
        locals_d=constants,
        chunk_size=chunk_size,
        trace_label=trace_label,
        trace_choice_name='mandatory_tour_frequency',
        estimator=estimator)

    # convert indexes to alternative names
    choices = pd.Series(model_spec.columns[choices.values],
                        index=choices.index)

    if estimator:
        estimator.write_choices(choices)
        choices = estimator.get_survey_values(choices, 'persons',
                                              'mandatory_tour_frequency')
        estimator.write_override_choices(choices)
        estimator.end_estimation()

    # - create mandatory tours
    """
    This reprocesses the choice of index of the mandatory tour frequency
    alternatives into an actual dataframe of tours.  Ending format is
    the same as got non_mandatory_tours except trip types are "work" and "school"
    """
    alternatives = simulate.read_model_alts(
        'mandatory_tour_frequency_alternatives.csv', set_index='alt')
    choosers['mandatory_tour_frequency'] = choices.reindex(choosers.index)

    mandatory_tours = process_mandatory_tours(
        persons=choosers, mandatory_tour_frequency_alts=alternatives)

    tours = pipeline.extend_table("tours", mandatory_tours)
    tracing.register_traceable_table('tours', mandatory_tours)
    pipeline.get_rn_generator().add_channel('tours', mandatory_tours)

    # - annotate persons
    persons = inject.get_table('persons').to_frame()

    # need to reindex as we only handled persons with cdap_activity == 'M'
    persons['mandatory_tour_frequency'] = choices.reindex(
        persons.index).fillna('').astype(str)

    expressions.assign_columns(
        df=persons,
        model_settings=model_settings.get('annotate_persons'),
        trace_label=tracing.extend_trace_label(trace_label,
                                               'annotate_persons'))

    pipeline.replace_table("persons", persons)

    tracing.print_summary('mandatory_tour_frequency',
                          persons.mandatory_tour_frequency,
                          value_counts=True)

    if trace_hh_id:
        tracing.trace_df(mandatory_tours,
                         label="mandatory_tour_frequency.mandatory_tours",
                         warn_if_empty=True)

        tracing.trace_df(persons,
                         label="mandatory_tour_frequency.persons",
                         warn_if_empty=True)
def non_mandatory_tour_frequency(persons, persons_merged,
                                 chunk_size,
                                 trace_hh_id):
    """
    This model predicts the frequency of making non-mandatory trips
    (alternatives for this model come from a separate csv file which is
    configured by the user) - these trips include escort, shopping, othmaint,
    othdiscr, eatout, and social trips in various combination.
    """

    trace_label = 'non_mandatory_tour_frequency'
    model_settings = config.read_model_settings('non_mandatory_tour_frequency.yaml')
    model_spec = simulate.read_model_spec(file_name='non_mandatory_tour_frequency.csv')

    alternatives = simulate.read_model_alts(
        config.config_file_path('non_mandatory_tour_frequency_alternatives.csv'),
        set_index=None)

    choosers = persons_merged.to_frame()

    # FIXME kind of tacky both that we know to add this here and del it below
    # 'tot_tours' is used in model_spec expressions
    alternatives['tot_tours'] = alternatives.sum(axis=1)

    # - preprocessor
    preprocessor_settings = model_settings.get('preprocessor', None)
    if preprocessor_settings:

        locals_dict = {
            'person_max_window': person_max_window
        }

        expressions.assign_columns(
            df=choosers,
            model_settings=preprocessor_settings,
            locals_dict=locals_dict,
            trace_label=trace_label)

    # filter based on results of CDAP
    choosers = choosers[choosers.cdap_activity.isin(['M', 'N'])]

    logger.info("Running non_mandatory_tour_frequency with %d persons", len(choosers))

    constants = config.get_model_constants(model_settings)

    choices_list = []
    # segment by person type and pick the right spec for each person type
    for ptype, segment in choosers.groupby('ptype'):

        name = PTYPE_NAME[ptype]

        # pick the spec column for the segment
        spec = model_spec[[name]]

        # drop any zero-valued rows
        spec = spec[spec[name] != 0]

        logger.info("Running segment '%s' of size %d", name, len(segment))

        choices = interaction_simulate(
            segment,
            alternatives,
            spec=spec,
            locals_d=constants,
            chunk_size=chunk_size,
            trace_label='non_mandatory_tour_frequency.%s' % name,
            trace_choice_name='non_mandatory_tour_frequency')

        choices_list.append(choices)

        # FIXME - force garbage collection?
        # force_garbage_collect()

    choices = pd.concat(choices_list)

    del alternatives['tot_tours']  # del tot_tours column we added above

    # - add non_mandatory_tour_frequency column to persons
    persons = persons.to_frame()
    # need to reindex as we only handled persons with cdap_activity in ['M', 'N']
    # (we expect there to be an alt with no tours - which we can use to backfill non-travelers)
    no_tours_alt = (alternatives.sum(axis=1) == 0).index[0]
    persons['non_mandatory_tour_frequency'] = \
        choices.reindex(persons.index).fillna(no_tours_alt).astype(np.int8)

    """
    We have now generated non-mandatory tours, but they are attributes of the person table
    Now we create a "tours" table which has one row per tour that has been generated
    (and the person id it is associated with)
    """

    # - get counts of each of the alternatives (so we can extend)
    # (choices is just the index values for the chosen alts)
    """
               escort  shopping  othmaint  othdiscr    eatout    social
    parent_id
    2588676         2         0         0         1         1         0
    2588677         0         1         0         1         0         0
    """
    tour_counts = alternatives.loc[choices]
    tour_counts.index = choices.index  # assign person ids to the index

    prev_tour_count = tour_counts.sum().sum()

    # - extend_tour_counts
    tour_counts = extend_tour_counts(choosers, tour_counts, alternatives,
                                     trace_hh_id,
                                     tracing.extend_trace_label(trace_label, 'extend_tour_counts'))

    extended_tour_count = tour_counts.sum().sum()

    logging.info("extend_tour_counts increased nmtf tour count by %s from %s to %s" %
                 (extended_tour_count - prev_tour_count, prev_tour_count, extended_tour_count))

    # - create the non_mandatory tours
    non_mandatory_tours = process_non_mandatory_tours(persons, tour_counts)
    assert len(non_mandatory_tours) == extended_tour_count

    pipeline.extend_table("tours", non_mandatory_tours)

    tracing.register_traceable_table('tours', non_mandatory_tours)
    pipeline.get_rn_generator().add_channel('tours', non_mandatory_tours)

    expressions.assign_columns(
        df=persons,
        model_settings=model_settings.get('annotate_persons'),
        trace_label=trace_label)

    pipeline.replace_table("persons", persons)

    tracing.print_summary('non_mandatory_tour_frequency',
                          persons.non_mandatory_tour_frequency, value_counts=True)

    if trace_hh_id:
        tracing.trace_df(non_mandatory_tours,
                         label="non_mandatory_tour_frequency.non_mandatory_tours",
                         warn_if_empty=True)

        tracing.trace_df(choosers,
                         label="non_mandatory_tour_frequency.choosers",
                         warn_if_empty=True)

        tracing.trace_df(persons,
                         label="non_mandatory_tour_frequency.annotated_persons",
                         warn_if_empty=True)
def _compute_logsums(alt_tdd, tours_merged, tour_purpose, model_settings, trace_label):
    """
    compute logsums for tours using skims for alt_tdd out_period and in_period
    """

    trace_label = tracing.extend_trace_label(trace_label, 'logsums')

    logsum_settings = config.read_model_settings(model_settings['LOGSUM_SETTINGS'])

    choosers = alt_tdd.join(tours_merged, how='left', rsuffix='_chooser')
    logger.info("%s compute_logsums for %d choosers%s alts" %
                (trace_label, choosers.shape[0], alt_tdd.shape[0]))

    # - setup skims

    skim_dict = inject.get_injectable('skim_dict')
    skim_stack = inject.get_injectable('skim_stack')

    orig_col_name = 'TAZ'
    dest_col_name = model_settings.get('DESTINATION_FOR_TOUR_PURPOSE').get(tour_purpose)

    odt_skim_stack_wrapper = skim_stack.wrap(left_key=orig_col_name, right_key=dest_col_name,
                                             skim_key='out_period')
    dot_skim_stack_wrapper = skim_stack.wrap(left_key=dest_col_name, right_key=orig_col_name,
                                             skim_key='in_period')
    od_skim_stack_wrapper = skim_dict.wrap(orig_col_name, dest_col_name)

    skims = {
        "odt_skims": odt_skim_stack_wrapper,
        "dot_skims": dot_skim_stack_wrapper,
        "od_skims": od_skim_stack_wrapper,
        'orig_col_name': orig_col_name,
        'dest_col_name': dest_col_name,
    }

    # - locals_dict
    constants = config.get_model_constants(logsum_settings)

    omnibus_coefficient_spec = get_coeffecients_spec(logsum_settings)
    coefficient_spec = omnibus_coefficient_spec[tour_purpose]
    coefficients = assign.evaluate_constants(coefficient_spec, constants=constants)

    locals_dict = {}
    locals_dict.update(coefficients)
    locals_dict.update(constants)
    locals_dict.update(skims)

    # - run preprocessor to annotate choosers
    # allow specification of alternate preprocessor for nontour choosers
    preprocessor = model_settings.get('LOGSUM_PREPROCESSOR', 'preprocessor')
    preprocessor_settings = logsum_settings[preprocessor]

    if preprocessor_settings:

        simulate.set_skim_wrapper_targets(choosers, skims)

        expressions.assign_columns(
            df=choosers,
            model_settings=preprocessor_settings,
            locals_dict=locals_dict,
            trace_label=trace_label)

    # - compute logsums
    logsum_spec = get_logsum_spec(logsum_settings)
    nest_spec = config.get_logit_model_settings(logsum_settings)

    logsums = simulate.simple_simulate_logsums(
        choosers,
        logsum_spec,
        nest_spec,
        skims=skims,
        locals_d=locals_dict,
        chunk_size=0,
        trace_label=trace_label)

    return logsums
Example #56
0
def atwork_subtour_scheduling(
        tours,
        persons_merged,
        tdd_alts,
        skim_dict,
        chunk_size,
        trace_hh_id):
    """
    This model predicts the departure time and duration of each activity for at work subtours tours
    """

    trace_label = 'atwork_subtour_scheduling'
    model_settings_file_name = 'tour_scheduling_atwork.yaml'

    tours = tours.to_frame()
    subtours = tours[tours.tour_category == 'atwork']

    # - if no atwork subtours
    if subtours.shape[0] == 0:
        tracing.no_results(trace_label)
        return

    model_settings = config.read_model_settings(model_settings_file_name)
    estimator = estimation.manager.begin_estimation('atwork_subtour_scheduling')

    model_spec = simulate.read_model_spec(file_name=model_settings['SPEC'])
    coefficients_df = simulate.read_model_coefficients(model_settings)
    model_spec = simulate.eval_coefficients(model_spec, coefficients_df, estimator)

    persons_merged = persons_merged.to_frame()

    logger.info("Running %s with %d tours", trace_label, len(subtours))

    # preprocessor
    constants = config.get_model_constants(model_settings)
    od_skim_wrapper = skim_dict.wrap('origin', 'destination')
    do_skim_wrapper = skim_dict.wrap('destination', 'origin')
    skims = {
        "od_skims": od_skim_wrapper,
        "do_skims": do_skim_wrapper,
    }
    annotate_preprocessors(
        subtours, constants, skims,
        model_settings, trace_label)

    # parent_tours table with columns ['tour_id', 'tdd'] index = tour_id
    parent_tour_ids = subtours.parent_tour_id.astype(np.int64).unique()
    parent_tours = pd.DataFrame({'tour_id': parent_tour_ids}, index=parent_tour_ids)
    parent_tours = parent_tours.merge(tours[['tdd']], left_index=True, right_index=True)

    if estimator:
        estimator.write_model_settings(model_settings, model_settings_file_name)
        estimator.write_spec(model_settings)
        estimator.write_coefficients(coefficients_df)
        # we don't need to update timetable because subtours are scheduled inside work trip windows

    choices = vectorize_subtour_scheduling(
        parent_tours,
        subtours,
        persons_merged,
        tdd_alts, model_spec,
        model_settings,
        estimator=estimator,
        chunk_size=chunk_size,
        trace_label=trace_label)

    if estimator:
        estimator.write_choices(choices)
        choices = estimator.get_survey_values(choices, 'tours', 'tdd')
        estimator.write_override_choices(choices)
        estimator.end_estimation()

    # choices are tdd alternative ids
    # we want to add start, end, and duration columns to tours, which we have in tdd_alts table
    tdd_choices = pd.merge(choices.to_frame('tdd'), tdd_alts, left_on=['tdd'], right_index=True, how='left')

    assign_in_place(tours, tdd_choices)
    pipeline.replace_table("tours", tours)

    if trace_hh_id:
        tracing.trace_df(tours[tours.tour_category == 'atwork'],
                         label="atwork_subtour_scheduling",
                         slicer='person_id',
                         index_label='tour_id',
                         columns=None)

    if DUMP:
        subtours = tours[tours.tour_category == 'atwork']
        parent_tours = tours[tours.index.isin(subtours.parent_tour_id)]

        tracing.dump_df(DUMP, subtours, trace_label, 'sub_tours')
        tracing.dump_df(DUMP, parent_tours, trace_label, 'parent_tours')

        parent_tours['parent_tour_id'] = parent_tours.index
        subtours = pd.concat([parent_tours, subtours])
        tracing.dump_df(DUMP,
                        tt.tour_map(parent_tours, subtours, tdd_alts,
                                    persons_id_col='parent_tour_id'),
                        trace_label, 'tour_map')