Python eval_coefficients Examples

Programming Language: Python

Namespace/Package Name: activitysim.core.simulate

Method/Function: eval_coefficients

Examples at hotexamples.com: 27

Python eval_coefficients - 27 examples found. These are the top rated real world Python examples of activitysim.core.simulate.eval_coefficients extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

def auto_ownership_simulate(households,
                            households_merged,
                            chunk_size,
                            trace_hh_id):
    """
    Auto ownership is a standard model which predicts how many cars a household
    with given characteristics owns
    """
    trace_label = 'auto_ownership_simulate'
    model_settings_file_name = 'auto_ownership.yaml'
    model_settings = config.read_model_settings(model_settings_file_name)

    estimator = estimation.manager.begin_estimation('auto_ownership')

    model_spec = simulate.read_model_spec(file_name=model_settings['SPEC'])
    coefficients_df = simulate.read_model_coefficients(model_settings)
    model_spec = simulate.eval_coefficients(model_spec, coefficients_df, estimator)

    nest_spec = config.get_logit_model_settings(model_settings)
    constants = config.get_model_constants(model_settings)

    choosers = households_merged.to_frame()

    logger.info("Running %s with %d households", trace_label, len(choosers))

    if estimator:
        estimator.write_model_settings(model_settings, model_settings_file_name)
        estimator.write_spec(model_settings)
        estimator.write_coefficients(coefficients_df)
        estimator.write_choosers(choosers)

    choices = simulate.simple_simulate(
        choosers=choosers,
        spec=model_spec,
        nest_spec=nest_spec,
        locals_d=constants,
        chunk_size=chunk_size,
        trace_label=trace_label,
        trace_choice_name='auto_ownership',
        estimator=estimator)

    if estimator:
        estimator.write_choices(choices)
        choices = estimator.get_survey_values(choices, 'households', 'auto_ownership')
        estimator.write_override_choices(choices)
        estimator.end_estimation()

    households = households.to_frame()

    # no need to reindex as we used all households
    households['auto_ownership'] = choices

    pipeline.replace_table("households", households)

    tracing.print_summary('auto_ownership', households.auto_ownership, value_counts=True)

    if trace_hh_id:
        tracing.trace_df(households,
                         label='auto_ownership',
                         warn_if_empty=True)

Example #2

Show file

def atwork_subtour_destination_sample(tours, persons_merged, model_settings,
                                      network_los, destination_size_terms,
                                      estimator, chunk_size, trace_label):

    model_spec = simulate.read_model_spec(
        file_name=model_settings['SAMPLE_SPEC'])
    coefficients_df = simulate.read_model_coefficients(model_settings)
    model_spec = simulate.eval_coefficients(model_spec, coefficients_df,
                                            estimator)

    # merge persons into tours
    choosers = pd.merge(tours,
                        persons_merged,
                        left_on='person_id',
                        right_index=True)
    # FIXME - MEMORY HACK - only include columns actually used in spec
    chooser_columns = model_settings['SIMULATE_CHOOSER_COLUMNS']
    choosers = choosers[chooser_columns]

    constants = config.get_model_constants(model_settings)

    sample_size = model_settings['SAMPLE_SIZE']
    if estimator:
        # FIXME interaction_sample will return unsampled complete alternatives with probs and pick_count
        logger.info(
            "Estimation mode for %s using unsampled alternatives short_circuit_choices"
            % (trace_label, ))
        sample_size = 0

    alt_dest_col_name = model_settings['ALT_DEST_COL_NAME']

    logger.info("Running atwork_subtour_location_sample with %d tours",
                len(choosers))

    # create wrapper with keys for this lookup - in this case there is a workplace_zone_id
    # in the choosers and a zone_id in the alternatives which get merged during interaction
    # the skims will be available under the name "skims" for any @ expressions
    dest_column_name = destination_size_terms.index.name
    skim_dict = network_los.get_default_skim_dict()
    skims = skim_dict.wrap('workplace_zone_id', dest_column_name)

    locals_d = {'skims': skims}
    if constants is not None:
        locals_d.update(constants)

    choices = interaction_sample(choosers,
                                 alternatives=destination_size_terms,
                                 sample_size=sample_size,
                                 alt_col_name=alt_dest_col_name,
                                 spec=model_spec,
                                 skims=skims,
                                 locals_d=locals_d,
                                 chunk_size=chunk_size,
                                 trace_label=trace_label)

    # remember person_id in chosen alts so we can merge with persons in subsequent steps
    choices['person_id'] = choosers.person_id

    return choices

Example #3

Show file

def joint_tour_scheduling(tours, persons_merged, tdd_alts, chunk_size,
                          trace_hh_id):
    """
    This model predicts the departure time and duration of each joint tour
    """
    trace_label = 'joint_tour_scheduling'

    model_settings_file_name = 'joint_tour_scheduling.yaml'
    model_settings = config.read_model_settings(model_settings_file_name)

    tours = tours.to_frame()
    joint_tours = tours[tours.tour_category == 'joint']

    # - if no joint tours
    if joint_tours.shape[0] == 0:
        tracing.no_results(trace_label)
        return

    # use inject.get_table as this won't exist if there are no joint_tours
    joint_tour_participants = inject.get_table(
        'joint_tour_participants').to_frame()

    persons_merged = persons_merged.to_frame()

    logger.info("Running %s with %d joint tours", trace_label,
                joint_tours.shape[0])

    # it may seem peculiar that we are concerned with persons rather than households
    # but every joint tour is (somewhat arbitrarily) assigned a "primary person"
    # some of whose characteristics are used in the spec
    # and we get household attributes along with person attributes in persons_merged
    persons_merged = persons_merged[persons_merged.num_hh_joint_tours > 0]

    # since a households joint tours each potentially different participants
    # they may also have different joint tour masks (free time of all participants)
    # so we have to either chunk processing by joint_tour_num and build timetable by household
    # or build timetables by unique joint_tour

    constants = config.get_model_constants(model_settings)

    # - run preprocessor to annotate choosers
    preprocessor_settings = model_settings.get('preprocessor', None)
    if preprocessor_settings:

        locals_d = {}
        if constants is not None:
            locals_d.update(constants)

        expressions.assign_columns(df=joint_tours,
                                   model_settings=preprocessor_settings,
                                   locals_dict=locals_d,
                                   trace_label=trace_label)

    timetable = inject.get_injectable("timetable")

    estimator = estimation.manager.begin_estimation('joint_tour_scheduling')

    model_spec = simulate.read_model_spec(file_name=model_settings['SPEC'])
    coefficients_df = simulate.read_model_coefficients(model_settings)
    model_spec = simulate.eval_coefficients(model_spec, coefficients_df,
                                            estimator)

    if estimator:
        estimator.write_model_settings(model_settings,
                                       model_settings_file_name)
        estimator.write_spec(model_settings)
        estimator.write_coefficients(coefficients_df)
        timetable.begin_transaction(estimator)

    choices = vectorize_joint_tour_scheduling(joint_tours,
                                              joint_tour_participants,
                                              persons_merged,
                                              tdd_alts,
                                              timetable,
                                              spec=model_spec,
                                              model_settings=model_settings,
                                              estimator=estimator,
                                              chunk_size=chunk_size,
                                              trace_label=trace_label)

    if estimator:
        estimator.write_choices(choices)
        choices = estimator.get_survey_values(choices, 'tours', 'tdd')
        estimator.write_override_choices(choices)
        estimator.end_estimation()

        # update timetable to reflect the override choices (assign tours in tour_num order)
        timetable.rollback()
        for tour_num, nth_tours in joint_tours.groupby('tour_num', sort=True):
            nth_participants = \
                joint_tour_participants[joint_tour_participants.tour_id.isin(nth_tours.index)]

            estimator.log(
                "assign timetable for %s participants in %s tours with tour_num %s"
                % (len(nth_participants), len(nth_tours), tour_num))
            # - update timetables of all joint tour participants
            timetable.assign(nth_participants.person_id,
                             reindex(choices, nth_participants.tour_id))

    timetable.replace_table()

    # choices are tdd alternative ids
    # we want to add start, end, and duration columns to tours, which we have in tdd_alts table
    choices = pd.merge(choices.to_frame('tdd'),
                       tdd_alts,
                       left_on=['tdd'],
                       right_index=True,
                       how='left')

    assign_in_place(tours, choices)
    pipeline.replace_table("tours", tours)

    # updated df for tracing
    joint_tours = tours[tours.tour_category == 'joint']

    if trace_hh_id:
        tracing.trace_df(joint_tours,
                         label="joint_tour_scheduling",
                         slicer='household_id')

Example #4

Show file

def non_mandatory_tour_frequency(persons, persons_merged, chunk_size,
                                 trace_hh_id):
    """
    This model predicts the frequency of making non-mandatory trips
    (alternatives for this model come from a separate csv file which is
    configured by the user) - these trips include escort, shopping, othmaint,
    othdiscr, eatout, and social trips in various combination.
    """

    trace_label = 'non_mandatory_tour_frequency'
    model_settings_file_name = 'non_mandatory_tour_frequency.yaml'

    model_settings = config.read_model_settings(model_settings_file_name)

    # FIXME kind of tacky both that we know to add this here and del it below
    # 'tot_tours' is used in model_spec expressions
    alternatives = simulate.read_model_alts(
        'non_mandatory_tour_frequency_alternatives.csv', set_index=None)
    alternatives['tot_tours'] = alternatives.sum(axis=1)

    # filter based on results of CDAP
    choosers = persons_merged.to_frame()
    choosers = choosers[choosers.cdap_activity.isin(['M', 'N'])]

    # - preprocessor
    preprocessor_settings = model_settings.get('preprocessor', None)
    if preprocessor_settings:

        locals_dict = {'person_max_window': person_max_window}

        expressions.assign_columns(df=choosers,
                                   model_settings=preprocessor_settings,
                                   locals_dict=locals_dict,
                                   trace_label=trace_label)

    logger.info("Running non_mandatory_tour_frequency with %d persons",
                len(choosers))

    constants = config.get_model_constants(model_settings)

    model_spec = simulate.read_model_spec(file_name=model_settings['SPEC'])
    spec_segments = model_settings.get('SPEC_SEGMENTS', {})

    # segment by person type and pick the right spec for each person type
    choices_list = []
    for segment_settings in spec_segments:

        segment_name = segment_settings['NAME']
        ptype = segment_settings['PTYPE']

        # pick the spec column for the segment
        segment_spec = model_spec[[segment_name]]

        chooser_segment = choosers[choosers.ptype == ptype]

        logger.info("Running segment '%s' of size %d", segment_name,
                    len(chooser_segment))

        if len(chooser_segment) == 0:
            # skip empty segments
            continue

        estimator = \
            estimation.manager.begin_estimation(model_name=segment_name, bundle_name='non_mandatory_tour_frequency')

        coefficients_df = simulate.read_model_coefficients(segment_settings)
        segment_spec = simulate.eval_coefficients(segment_spec,
                                                  coefficients_df, estimator)

        if estimator:
            estimator.write_spec(model_settings, bundle_directory=True)
            estimator.write_model_settings(model_settings,
                                           model_settings_file_name,
                                           bundle_directory=True)
            # preserving coefficients file name makes bringing back updated coefficients more straightforward
            estimator.write_coefficients(coefficients_df, segment_settings)
            estimator.write_choosers(chooser_segment)
            estimator.write_alternatives(alternatives, bundle_directory=True)

            # FIXME #interaction_simulate_estimation_requires_chooser_id_in_df_column
            #  shuold we do it here or have interaction_simulate do it?
            # chooser index must be duplicated in column or it will be omitted from interaction_dataset
            # estimation requires that chooser_id is either in index or a column of interaction_dataset
            # so it can be reformatted (melted) and indexed by chooser_id and alt_id
            assert chooser_segment.index.name == 'person_id'
            assert 'person_id' not in chooser_segment.columns
            chooser_segment['person_id'] = chooser_segment.index

            # FIXME set_alt_id - do we need this for interaction_simulate estimation bundle tables?
            estimator.set_alt_id('alt_id')

            estimator.set_chooser_id(chooser_segment.index.name)

        choices = interaction_simulate(
            chooser_segment,
            alternatives,
            spec=segment_spec,
            locals_d=constants,
            chunk_size=chunk_size,
            trace_label='non_mandatory_tour_frequency.%s' % segment_name,
            trace_choice_name='non_mandatory_tour_frequency',
            estimator=estimator)

        if estimator:
            estimator.write_choices(choices)
            choices = estimator.get_survey_values(
                choices, 'persons', 'non_mandatory_tour_frequency')
            estimator.write_override_choices(choices)
            estimator.end_estimation()

        choices_list.append(choices)

        # FIXME - force garbage collection?
        force_garbage_collect()

    del alternatives['tot_tours']  # del tot_tours column we added above

    # The choice value 'non_mandatory_tour_frequency' assigned by interaction_simulate
    # is the index value of the chosen alternative in the alternatives table.
    choices = pd.concat(choices_list).sort_index()

    # add non_mandatory_tour_frequency column to persons
    persons = persons.to_frame()
    # we expect there to be an alt with no tours - which we can use to backfill non-travelers
    no_tours_alt = (alternatives.sum(axis=1) == 0).index[0]
    # need to reindex as we only handled persons with cdap_activity in ['M', 'N']
    persons['non_mandatory_tour_frequency'] = \
        choices.reindex(persons.index).fillna(no_tours_alt).astype(np.int8)
    """
    We have now generated non-mandatory tour frequencies, but they are attributes of the person table
    Now we create a "tours" table which has one row per tour that has been generated
    (and the person id it is associated with)

    But before we do that, we run an additional probablilistic step to extend/increase tour counts
    beyond the strict limits of the tour_frequency alternatives chosen above (which are currently limited
    to at most 2 escort tours and 1 each of shopping, othmaint, othdiscr, eatout, and social tours)

    The choice value 'non_mandatory_tour_frequency' assigned by interaction_simulate is simply the
    index value of the chosen alternative in the alternatives table.

    get counts of each of the tour type alternatives (so we can extend)
               escort  shopping  othmaint  othdiscr    eatout    social
    parent_id
    2588676         2         0         0         1         1         0
    2588677         0         1         0         1         0         0
    """

    # counts of each of the tour type alternatives (so we can extend)
    modeled_tour_counts = alternatives.loc[choices]
    modeled_tour_counts.index = choices.index  # assign person ids to the index

    # - extend_tour_counts - probabalistic
    extended_tour_counts = \
        extend_tour_counts(choosers, modeled_tour_counts.copy(), alternatives,
                           trace_hh_id, tracing.extend_trace_label(trace_label, 'extend_tour_counts'))

    num_modeled_tours = modeled_tour_counts.sum().sum()
    num_extended_tours = extended_tour_counts.sum().sum()
    logger.info("extend_tour_counts increased tour count by %s from %s to %s" %
                (num_extended_tours - num_modeled_tours, num_modeled_tours,
                 num_extended_tours))
    """
    create the non_mandatory tours based on extended_tour_counts
    """
    if estimator:
        override_tour_counts = \
            estimation.manager.get_survey_values(extended_tour_counts,
                                                 table_name='persons',
                                                 column_names=['_%s' % c for c in extended_tour_counts.columns])
        override_tour_counts = \
            override_tour_counts.rename(columns={('_%s' % c): c for c in extended_tour_counts.columns})
        logger.info(
            "estimation get_survey_values override_tour_counts %s changed cells"
            % (override_tour_counts != extended_tour_counts).sum().sum())
        extended_tour_counts = override_tour_counts
    """
    create the non_mandatory tours based on extended_tour_counts
    """
    non_mandatory_tours = process_non_mandatory_tours(persons,
                                                      extended_tour_counts)
    assert len(non_mandatory_tours) == extended_tour_counts.sum().sum()

    if estimator:

        # make sure they created the right tours
        survey_tours = estimation.manager.get_survey_table(
            'tours').sort_index()
        non_mandatory_survey_tours = survey_tours[survey_tours.tour_category ==
                                                  'non_mandatory']
        assert len(non_mandatory_survey_tours) == len(non_mandatory_tours)
        assert non_mandatory_survey_tours.index.equals(
            non_mandatory_tours.sort_index().index)

        # make sure they created tours with the expected tour_ids
        columns = ['person_id', 'household_id', 'tour_type', 'tour_category']
        survey_tours = \
            estimation.manager.get_survey_values(non_mandatory_tours,
                                                 table_name='tours',
                                                 column_names=columns)

        tours_differ = (non_mandatory_tours[columns] !=
                        survey_tours[columns]).any(axis=1)

        if tours_differ.any():
            print("tours_differ\n%s" % tours_differ)
            print("%s of %s tours differ" %
                  (tours_differ.sum(), len(tours_differ)))
            print("differing survey_tours\n%s" % survey_tours[tours_differ])
            print("differing modeled_tours\n%s" %
                  non_mandatory_tours[columns][tours_differ])

        assert (not tours_differ.any())

    pipeline.extend_table("tours", non_mandatory_tours)

    tracing.register_traceable_table('tours', non_mandatory_tours)
    pipeline.get_rn_generator().add_channel('tours', non_mandatory_tours)

    expressions.assign_columns(
        df=persons,
        model_settings=model_settings.get('annotate_persons'),
        trace_label=trace_label)

    pipeline.replace_table("persons", persons)

    tracing.print_summary('non_mandatory_tour_frequency',
                          persons.non_mandatory_tour_frequency,
                          value_counts=True)

    if trace_hh_id:
        tracing.trace_df(
            non_mandatory_tours,
            label="non_mandatory_tour_frequency.non_mandatory_tours",
            warn_if_empty=True)

        tracing.trace_df(choosers,
                         label="non_mandatory_tour_frequency.choosers",
                         warn_if_empty=True)

        tracing.trace_df(
            persons,
            label="non_mandatory_tour_frequency.annotated_persons",
            warn_if_empty=True)

Example #5

Show file

def run_tour_mode_choice_simulate(choosers,
                                  tour_purpose,
                                  model_settings,
                                  mode_column_name,
                                  logsum_column_name,
                                  network_los,
                                  skims,
                                  constants,
                                  estimator,
                                  chunk_size,
                                  trace_label=None,
                                  trace_choice_name=None):
    """
    This is a utility to run a mode choice model for each segment (usually
    segments are tour/trip purposes).  Pass in the tours/trip that need a mode,
    the Skim object, the spec to evaluate with, and any additional expressions
    you want to use in the evaluation of variables.
    """

    spec = simulate.read_model_spec(file_name=model_settings['SPEC'])
    coefficients = simulate.get_segment_coefficients(model_settings,
                                                     tour_purpose)

    spec = simulate.eval_coefficients(spec, coefficients, estimator)

    nest_spec = config.get_logit_model_settings(model_settings)
    nest_spec = simulate.eval_nest_coefficients(nest_spec, coefficients,
                                                trace_label)

    locals_dict = {}
    locals_dict.update(constants)
    locals_dict.update(skims)

    # coefficients can appear in expressions
    locals_dict.update(coefficients)

    assert ('in_period' not in choosers) and ('out_period' not in choosers)
    in_time = skims['in_time_col_name']
    out_time = skims['out_time_col_name']
    choosers['in_period'] = network_los.skim_time_period_label(
        choosers[in_time])
    choosers['out_period'] = network_los.skim_time_period_label(
        choosers[out_time])

    expressions.annotate_preprocessors(choosers, locals_dict, skims,
                                       model_settings, trace_label)

    trace_column_names = choosers.index.name
    assert trace_column_names == 'tour_id'
    if trace_column_names not in choosers:
        choosers[trace_column_names] = choosers.index

    if estimator:
        # write choosers after annotation
        estimator.write_choosers(choosers)

    choices = mode_choice_simulate(choosers=choosers,
                                   spec=spec,
                                   nest_spec=nest_spec,
                                   skims=skims,
                                   locals_d=locals_dict,
                                   chunk_size=chunk_size,
                                   mode_column_name=mode_column_name,
                                   logsum_column_name=logsum_column_name,
                                   trace_label=trace_label,
                                   trace_choice_name=trace_choice_name,
                                   trace_column_names=trace_column_names,
                                   estimator=estimator)

    return choices

Example #6

Show file

def compute_logsums(primary_purpose, trips, destination_sample, tours_merged,
                    model_settings, skim_hotel, chunk_size, trace_label):
    """
    Calculate mode choice logsums using the same recipe as for trip_mode_choice, but do it twice
    for each alternative since we need out-of-direction logsum
    (i.e . origin to alt_dest, and alt_dest to half-tour destination)

    Returns
    -------
        adds od_logsum and dp_logsum columns to trips (in place)
    """
    trace_label = tracing.extend_trace_label(trace_label, 'compute_logsums')
    logger.info("Running %s with %d samples", trace_label,
                destination_sample.shape[0])

    # chunk usage is uniform so better to combine
    chunk_tag = 'trip_destination.compute_logsums'

    # FIXME should pass this in?
    network_los = inject.get_injectable('network_los')

    # - trips_merged - merge trips and tours_merged
    trips_merged = pd.merge(trips,
                            tours_merged,
                            left_on='tour_id',
                            right_index=True,
                            how="left")
    assert trips_merged.index.equals(trips.index)

    # - choosers - merge destination_sample and trips_merged
    # re/set index because pandas merge does not preserve left index if it has duplicate values!
    choosers = pd.merge(destination_sample,
                        trips_merged.reset_index(),
                        left_index=True,
                        right_on='trip_id',
                        how="left",
                        suffixes=('', '_r')).set_index('trip_id')
    assert choosers.index.equals(destination_sample.index)

    logsum_settings = config.read_model_settings(
        model_settings['LOGSUM_SETTINGS'])
    coefficients = simulate.get_segment_coefficients(logsum_settings,
                                                     primary_purpose)

    nest_spec = config.get_logit_model_settings(logsum_settings)
    nest_spec = simulate.eval_nest_coefficients(nest_spec, coefficients,
                                                trace_label)

    logsum_spec = simulate.read_model_spec(file_name=logsum_settings['SPEC'])
    logsum_spec = simulate.eval_coefficients(logsum_spec,
                                             coefficients,
                                             estimator=None)

    locals_dict = {}
    locals_dict.update(config.get_model_constants(logsum_settings))

    # coefficients can appear in expressions
    locals_dict.update(coefficients)

    skims = skim_hotel.logsum_skims()
    if network_los.zone_system == los.THREE_ZONE:
        # TVPB constants can appear in expressions
        locals_dict.update(
            network_los.setting('TVPB_SETTINGS.tour_mode_choice.CONSTANTS'))

    # - od_logsums
    od_skims = {
        'ORIGIN': model_settings['TRIP_ORIGIN'],
        'DESTINATION': model_settings['ALT_DEST_COL_NAME'],
        "odt_skims": skims['odt_skims'],
        "dot_skims": skims['dot_skims'],
        "od_skims": skims['od_skims'],
    }
    if network_los.zone_system == los.THREE_ZONE:
        od_skims.update({
            'tvpb_logsum_odt': skims['tvpb_logsum_odt'],
            'tvpb_logsum_dot': skims['tvpb_logsum_dot']
        })
    destination_sample['od_logsum'] = compute_ood_logsums(
        choosers,
        logsum_settings,
        nest_spec,
        logsum_spec,
        od_skims,
        locals_dict,
        chunk_size,
        trace_label=tracing.extend_trace_label(trace_label, 'od'),
        chunk_tag=chunk_tag)

    # - dp_logsums
    dp_skims = {
        'ORIGIN': model_settings['ALT_DEST_COL_NAME'],
        'DESTINATION': model_settings['PRIMARY_DEST'],
        "odt_skims": skims['dpt_skims'],
        "dot_skims": skims['pdt_skims'],
        "od_skims": skims['dp_skims'],
    }
    if network_los.zone_system == los.THREE_ZONE:
        dp_skims.update({
            'tvpb_logsum_odt': skims['tvpb_logsum_dpt'],
            'tvpb_logsum_dot': skims['tvpb_logsum_pdt']
        })

    destination_sample['dp_logsum'] = compute_ood_logsums(
        choosers,
        logsum_settings,
        nest_spec,
        logsum_spec,
        dp_skims,
        locals_dict,
        chunk_size,
        trace_label=tracing.extend_trace_label(trace_label, 'dp'),
        chunk_tag=chunk_tag)

    return destination_sample

Example #7

Show file

File: atwork_subtour_destination.py Project: ymaVTA/activitysim

def atwork_subtour_destination_simulate(subtours, persons_merged,
                                        destination_sample, want_logsums,
                                        model_settings, skim_dict,
                                        destination_size_terms, estimator,
                                        chunk_size, trace_label):
    """
    atwork_subtour_destination model on atwork_subtour_destination_sample
    annotated with mode_choice logsum to select a destination from sample alternatives
    """

    model_spec = simulate.read_model_spec(file_name=model_settings['SPEC'])
    coefficients_df = simulate.read_model_coefficients(model_settings)
    model_spec = simulate.eval_coefficients(model_spec, coefficients_df,
                                            estimator)

    # interaction_sample_simulate insists choosers appear in same order as alts
    subtours = subtours.sort_index()

    # merge persons into tours
    choosers = pd.merge(subtours,
                        persons_merged,
                        left_on='person_id',
                        right_index=True)
    # FIXME - MEMORY HACK - only include columns actually used in spec
    chooser_columns = model_settings['SIMULATE_CHOOSER_COLUMNS']
    choosers = choosers[chooser_columns]

    if estimator:
        estimator.write_choosers(choosers)

    alt_dest_col_name = model_settings['ALT_DEST_COL_NAME']
    chooser_col_name = 'workplace_taz'

    # alternatives are pre-sampled and annotated with logsums and pick_count
    # but we have to merge destination_size_terms columns into alt sample list
    alternatives = \
        pd.merge(destination_sample, destination_size_terms,
                 left_on=alt_dest_col_name, right_index=True, how="left")

    tracing.dump_df(DUMP, alternatives, trace_label, 'alternatives')

    constants = config.get_model_constants(model_settings)

    logger.info("Running atwork_subtour_destination_simulate with %d persons",
                len(choosers))

    # create wrapper with keys for this lookup - in this case there is a TAZ in the choosers
    # and a TAZ in the alternatives which get merged during interaction
    # the skims will be available under the name "skims" for any @ expressions
    skims = skim_dict.wrap(chooser_col_name, alt_dest_col_name)

    locals_d = {
        'skims': skims,
    }
    if constants is not None:
        locals_d.update(constants)

    tracing.dump_df(DUMP, choosers, trace_label, 'choosers')

    choices = interaction_sample_simulate(
        choosers,
        alternatives,
        spec=model_spec,
        choice_column=alt_dest_col_name,
        want_logsums=want_logsums,
        skims=skims,
        locals_d=locals_d,
        chunk_size=chunk_size,
        trace_label=trace_label,
        trace_choice_name='workplace_location',
        estimator=estimator)

    if not want_logsums:
        # for consistency, always return a dataframe with canonical column name
        assert isinstance(choices, pd.Series)
        choices = choices.to_frame('choice')

    return choices

Example #8

Show file

def _compute_logsums(alt_tdd, tours_merged, tour_purpose, model_settings,
                     network_los, skims, trace_label):
    """
    compute logsums for tours using skims for alt_tdd out_period and in_period
    """

    trace_label = tracing.extend_trace_label(trace_label, 'logsums')

    logsum_settings = config.read_model_settings(
        model_settings['LOGSUM_SETTINGS'])

    choosers = alt_tdd.join(tours_merged, how='left', rsuffix='_chooser')
    logger.info(
        f"{trace_label} compute_logsums for {choosers.shape[0]} choosers {alt_tdd.shape[0]} alts"
    )

    # - locals_dict
    constants = config.get_model_constants(logsum_settings)
    locals_dict = {}
    locals_dict.update(constants)

    if network_los.zone_system == los.THREE_ZONE:
        # TVPB constants can appear in expressions
        locals_dict.update(
            network_los.setting('TVPB_SETTINGS.tour_mode_choice.CONSTANTS'))

    locals_dict.update(skims)

    # constrained coefficients can appear in expressions
    coefficients = simulate.get_segment_coefficients(logsum_settings,
                                                     tour_purpose)
    locals_dict.update(coefficients)

    # - run preprocessor to annotate choosers
    # allow specification of alternate preprocessor for nontour choosers
    preprocessor = model_settings.get('LOGSUM_PREPROCESSOR', 'preprocessor')
    preprocessor_settings = logsum_settings[preprocessor]

    if preprocessor_settings:

        simulate.set_skim_wrapper_targets(choosers, skims)

        expressions.assign_columns(df=choosers,
                                   model_settings=preprocessor_settings,
                                   locals_dict=locals_dict,
                                   trace_label=trace_label)

    # - compute logsums
    logsum_spec = simulate.read_model_spec(file_name=logsum_settings['SPEC'])
    logsum_spec = simulate.eval_coefficients(logsum_spec,
                                             coefficients,
                                             estimator=None)

    nest_spec = config.get_logit_model_settings(logsum_settings)
    nest_spec = simulate.eval_nest_coefficients(nest_spec, coefficients,
                                                trace_label)

    logsums = simulate.simple_simulate_logsums(choosers,
                                               logsum_spec,
                                               nest_spec,
                                               skims=skims,
                                               locals_d=locals_dict,
                                               chunk_size=0,
                                               trace_label=trace_label)

    return logsums

Example #9

Show file

File: telecommute_frequency.py Project: fscottfoti/activitysim

def telecommute_frequency(
        persons_merged, persons,
        chunk_size, trace_hh_id):
    """
    This model predicts the frequency of telecommute for a person (worker) who
    does not works from home. The alternatives of this model are 'No Telecommute',
    '1 day per week', '2 to 3 days per week' and '4 days per week'. This model
    reflects the choices of people who prefer a combination of working from home and
    office during a week.
    """

    trace_label = 'telecommute_frequency'
    model_settings_file_name = 'telecommute_frequency.yaml'

    choosers = persons_merged.to_frame()
    choosers = choosers[choosers.workplace_zone_id > -1]

    logger.info("Running %s with %d persons", trace_label, len(choosers))

    model_settings = config.read_model_settings(model_settings_file_name)
    estimator = estimation.manager.begin_estimation('telecommute_frequency')

    constants = config.get_model_constants(model_settings)

    # - preprocessor
    preprocessor_settings = model_settings.get('preprocessor', None)
    if preprocessor_settings:

        locals_d = {}
        if constants is not None:
            locals_d.update(constants)

        expressions.assign_columns(
            df=choosers,
            model_settings=preprocessor_settings,
            locals_dict=locals_d,
            trace_label=trace_label)

    model_spec = simulate.read_model_spec(file_name=model_settings['SPEC'])
    coefficients_df = simulate.read_model_coefficients(model_settings)
    model_spec = simulate.eval_coefficients(model_spec, coefficients_df, estimator)

    nest_spec = config.get_logit_model_settings(model_settings)

    if estimator:
        estimator.write_model_settings(model_settings, model_settings_file_name)
        estimator.write_spec(model_settings)
        estimator.write_coefficients(coefficients_df)
        estimator.write_choosers(choosers)

    choices = simulate.simple_simulate(
        choosers=choosers,
        spec=model_spec,
        nest_spec=nest_spec,
        locals_d=constants,
        chunk_size=chunk_size,
        trace_label=trace_label,
        trace_choice_name='telecommute_frequency',
        estimator=estimator)

    choices = pd.Series(model_spec.columns[choices.values], index=choices.index)

    if estimator:
        estimator.write_choices(choices)
        choices = estimator.get_survey_values(choices, 'persons', 'telecommute_frequency')
        estimator.write_override_choices(choices)
        estimator.end_estimation()

    persons = persons.to_frame()
    persons['telecommute_frequency'] = choices.reindex(persons.index).fillna('').astype(str)

    pipeline.replace_table("persons", persons)

    tracing.print_summary('telecommute_frequency', persons.telecommute_frequency, value_counts=True)

    if trace_hh_id:
        tracing.trace_df(persons,
                         label=trace_label,
                         warn_if_empty=True)

Example #10

Show file

File: transit_pass_subsidy.py Project: fscottfoti/activitysim

def transit_pass_subsidy(persons_merged, persons, chunk_size, trace_hh_id):
    """
    Transit pass subsidy model.
    """

    trace_label = 'transit_pass_subsidy'
    model_settings_file_name = 'transit_pass_subsidy.yaml'

    choosers = persons_merged.to_frame()
    logger.info("Running %s with %d persons", trace_label, len(choosers))

    model_settings = config.read_model_settings(model_settings_file_name)
    estimator = estimation.manager.begin_estimation('transit_pass_subsidy')

    constants = config.get_model_constants(model_settings)

    # - preprocessor
    preprocessor_settings = model_settings.get('preprocessor', None)
    if preprocessor_settings:

        locals_d = {}
        if constants is not None:
            locals_d.update(constants)

        expressions.assign_columns(df=choosers,
                                   model_settings=preprocessor_settings,
                                   locals_dict=locals_d,
                                   trace_label=trace_label)

    model_spec = simulate.read_model_spec(file_name=model_settings['SPEC'])
    coefficients_df = simulate.read_model_coefficients(model_settings)
    model_spec = simulate.eval_coefficients(model_spec, coefficients_df,
                                            estimator)

    nest_spec = config.get_logit_model_settings(model_settings)

    if estimator:
        estimator.write_model_settings(model_settings,
                                       model_settings_file_name)
        estimator.write_spec(model_settings)
        estimator.write_coefficients(coefficients_df)
        estimator.write_choosers(choosers)

    choices = simulate.simple_simulate(
        choosers=choosers,
        spec=model_spec,
        nest_spec=nest_spec,
        locals_d=constants,
        chunk_size=chunk_size,
        trace_label=trace_label,
        trace_choice_name='transit_pass_subsidy',
        estimator=estimator)

    if estimator:
        estimator.write_choices(choices)
        choices = estimator.get_survey_values(choices, 'persons',
                                              'transit_pass_subsidy')
        estimator.write_override_choices(choices)
        estimator.end_estimation()

    persons = persons.to_frame()
    persons['transit_pass_subsidy'] = choices.reindex(persons.index)

    pipeline.replace_table("persons", persons)

    tracing.print_summary('transit_pass_subsidy',
                          persons.transit_pass_subsidy,
                          value_counts=True)

    if trace_hh_id:
        tracing.trace_df(persons, label=trace_label, warn_if_empty=True)

Example #11

Show file

def work_from_home(persons_merged, persons, chunk_size, trace_hh_id):
    """
    This model predicts whether a person (worker) works from home. The output
    from this model is TRUE (if works from home) or FALSE (works away from home).
    The workplace location choice is overridden for workers who work from home
    and set to -1.

    """

    trace_label = 'work_from_home'
    model_settings_file_name = 'work_from_home.yaml'

    choosers = persons_merged.to_frame()
    choosers = choosers[choosers.workplace_zone_id > -1]
    logger.info("Running %s with %d persons", trace_label, len(choosers))

    model_settings = config.read_model_settings(model_settings_file_name)
    estimator = estimation.manager.begin_estimation('work_from_home')

    constants = config.get_model_constants(model_settings)
    work_from_home_alt = model_settings['WORK_FROM_HOME_ALT']

    # - preprocessor
    preprocessor_settings = model_settings.get('preprocessor', None)
    if preprocessor_settings:

        locals_d = {}
        if constants is not None:
            locals_d.update(constants)

        expressions.assign_columns(df=choosers,
                                   model_settings=preprocessor_settings,
                                   locals_dict=locals_d,
                                   trace_label=trace_label)

    model_spec = simulate.read_model_spec(file_name=model_settings['SPEC'])
    coefficients_df = simulate.read_model_coefficients(model_settings)

    nest_spec = config.get_logit_model_settings(model_settings)

    if estimator:
        estimator.write_model_settings(model_settings,
                                       model_settings_file_name)
        estimator.write_spec(model_settings)
        estimator.write_coefficients(coefficients_df)
        estimator.write_choosers(choosers)

    # - iterative what-if if specified
    iterations = model_settings.get('WORK_FROM_HOME_ITERATIONS', 1)
    iterations_coefficient_constant = model_settings.get(
        'WORK_FROM_HOME_COEFFICIENT_CONSTANT', None)
    iterations_target_percent = model_settings.get(
        'WORK_FROM_HOME_TARGET_PERCENT', None)
    iterations_target_percent_tolerance = model_settings.get(
        'WORK_FROM_HOME_TARGET_PERCENT_TOLERANCE', None)

    for iteration in range(iterations):

        logger.info("Running %s with %d persons iteration %d", trace_label,
                    len(choosers), iteration)

        # re-read spec to reset substitution
        model_spec = simulate.read_model_spec(file_name=model_settings['SPEC'])
        model_spec = simulate.eval_coefficients(model_spec, coefficients_df,
                                                estimator)

        choices = simulate.simple_simulate(choosers=choosers,
                                           spec=model_spec,
                                           nest_spec=nest_spec,
                                           locals_d=constants,
                                           chunk_size=chunk_size,
                                           trace_label=trace_label,
                                           trace_choice_name='work_from_home',
                                           estimator=estimator)

        if iterations_target_percent is not None:
            current_percent = ((choices == work_from_home_alt).sum() /
                               len(choices))
            logger.info(
                "Running %s iteration %i current percent %f target percent %f",
                trace_label, iteration, current_percent,
                iterations_target_percent)

            if current_percent <= (iterations_target_percent +
                                   iterations_target_percent_tolerance
                                   ) and current_percent >= (
                                       iterations_target_percent -
                                       iterations_target_percent_tolerance):
                logger.info(
                    "Running %s iteration %i converged with coefficient %f",
                    trace_label, iteration,
                    coefficients_df.value[iterations_coefficient_constant])
                break

            else:
                new_value = np.log(
                    iterations_target_percent /
                    np.maximum(current_percent, 0.0001)
                ) + coefficients_df.value[iterations_coefficient_constant]
                coefficients_df.value[
                    iterations_coefficient_constant] = new_value
                logger.info(
                    "Running %s iteration %i new coefficient for next iteration %f",
                    trace_label, iteration, new_value)
                iteration = iteration + 1

    choices = (choices == work_from_home_alt)

    dest_choice_column_name = model_settings['DEST_CHOICE_COLUMN_NAME']

    if estimator:
        estimator.write_choices(choices)
        choices = estimator.get_survey_values(choices, 'persons',
                                              'work_from_home')
        estimator.write_override_choices(choices)
        estimator.end_estimation()

    persons = persons.to_frame()
    persons['work_from_home'] = choices.reindex(
        persons.index).fillna(0).astype(bool)
    persons[dest_choice_column_name] = np.where(
        persons.work_from_home is True, -1, persons[dest_choice_column_name])

    pipeline.replace_table("persons", persons)

    tracing.print_summary('work_from_home',
                          persons.work_from_home,
                          value_counts=True)

    if trace_hh_id:
        tracing.trace_df(persons, label=trace_label, warn_if_empty=True)

Example #12

Show file

File: logsums.py Project: figo2002/activitysim

def compute_logsums(choosers,
                    tour_purpose,
                    logsum_settings, model_settings,
                    network_los,
                    chunk_size,
                    chunk_tag,
                    trace_label):
    """

    Parameters
    ----------
    choosers
    tour_purpose
    logsum_settings
    model_settings
    network_los
    chunk_size
    trace_hh_id
    trace_label

    Returns
    -------
    logsums: pandas series
        computed logsums with same index as choosers
    """

    trace_label = tracing.extend_trace_label(trace_label, 'compute_logsums')
    logger.debug("Running compute_logsums with %d choosers" % choosers.shape[0])

    # compute_logsums needs to know name of dest column in interaction_sample
    orig_col_name = model_settings['CHOOSER_ORIG_COL_NAME']
    dest_col_name = model_settings['ALT_DEST_COL_NAME']

    # FIXME - are we ok with altering choosers (so caller doesn't have to set these)?
    assert ('in_period' not in choosers) and ('out_period' not in choosers)
    choosers['in_period'] = network_los.skim_time_period_label(model_settings['IN_PERIOD'])
    choosers['out_period'] = network_los.skim_time_period_label(model_settings['OUT_PERIOD'])

    assert ('duration' not in choosers)
    choosers['duration'] = model_settings['IN_PERIOD'] - model_settings['OUT_PERIOD']

    logsum_spec = simulate.read_model_spec(file_name=logsum_settings['SPEC'])
    coefficients = simulate.get_segment_coefficients(logsum_settings, tour_purpose)

    logsum_spec = simulate.eval_coefficients(logsum_spec, coefficients, estimator=None)

    nest_spec = config.get_logit_model_settings(logsum_settings)
    nest_spec = simulate.eval_nest_coefficients(nest_spec, coefficients, trace_label)

    locals_dict = {}
    # model_constants can appear in expressions
    locals_dict.update(config.get_model_constants(logsum_settings))
    # constrained coefficients can appear in expressions
    locals_dict.update(coefficients)

    # setup skim keys
    skim_dict = network_los.get_default_skim_dict()

    odt_skim_stack_wrapper = skim_dict.wrap_3d(orig_key=orig_col_name, dest_key=dest_col_name,
                                               dim3_key='out_period')
    dot_skim_stack_wrapper = skim_dict.wrap_3d(orig_key=dest_col_name, dest_key=orig_col_name,
                                               dim3_key='in_period')
    odr_skim_stack_wrapper = skim_dict.wrap_3d(orig_key=orig_col_name, dest_key=dest_col_name,
                                               dim3_key='in_period')
    dor_skim_stack_wrapper = skim_dict.wrap_3d(orig_key=dest_col_name, dest_key=orig_col_name,
                                               dim3_key='out_period')
    od_skim_stack_wrapper = skim_dict.wrap(orig_col_name, dest_col_name)

    skims = {
        "odt_skims": odt_skim_stack_wrapper,
        "dot_skims": dot_skim_stack_wrapper,
        "odr_skims": odr_skim_stack_wrapper,
        "dor_skims": dor_skim_stack_wrapper,
        "od_skims": od_skim_stack_wrapper,
        'orig_col_name': orig_col_name,
        'dest_col_name': dest_col_name
    }

    if network_los.zone_system == los.THREE_ZONE:
        # fixme - is this a lightweight object?
        tvpb = network_los.tvpb

        tvpb_logsum_odt = tvpb.wrap_logsum(orig_key=orig_col_name, dest_key=dest_col_name,
                                           tod_key='out_period', segment_key='demographic_segment',
                                           trace_label=trace_label, tag='tvpb_logsum_odt')
        tvpb_logsum_dot = tvpb.wrap_logsum(orig_key=dest_col_name, dest_key=orig_col_name,
                                           tod_key='in_period', segment_key='demographic_segment',
                                           trace_label=trace_label, tag='tvpb_logsum_dot')

        skims.update({
            'tvpb_logsum_odt': tvpb_logsum_odt,
            'tvpb_logsum_dot': tvpb_logsum_dot
        })

        # TVPB constants can appear in expressions
        locals_dict.update(network_los.setting('TVPB_SETTINGS.tour_mode_choice.CONSTANTS'))

    locals_dict.update(skims)

    # - run preprocessor to annotate choosers
    # allow specification of alternate preprocessor for nontour choosers
    preprocessor = model_settings.get('LOGSUM_PREPROCESSOR', 'preprocessor')
    preprocessor_settings = logsum_settings[preprocessor]

    if preprocessor_settings:

        simulate.set_skim_wrapper_targets(choosers, skims)

        expressions.assign_columns(
            df=choosers,
            model_settings=preprocessor_settings,
            locals_dict=locals_dict,
            trace_label=trace_label)

    logsums = simulate.simple_simulate_logsums(
        choosers,
        logsum_spec,
        nest_spec,
        skims=skims,
        locals_d=locals_dict,
        chunk_size=chunk_size,
        chunk_tag=chunk_tag,
        trace_label=trace_label)

    return logsums

Example #13

Show file

File: trip_mode_choice.py Project: fscottfoti/activitysim

def trip_mode_choice(trips, tours_merged, network_los, chunk_size,
                     trace_hh_id):
    """
    Trip mode choice - compute trip_mode (same values as for tour_mode) for each trip.

    Modes for each primary tour putpose are calculated separately because they have different
    coefficient values (stored in trip_mode_choice_coefficients.csv coefficient file.)

    Adds trip_mode column to trip table
    """
    trace_label = 'trip_mode_choice'
    model_settings_file_name = 'trip_mode_choice.yaml'
    model_settings = config.read_model_settings(model_settings_file_name)

    logsum_column_name = model_settings.get('MODE_CHOICE_LOGSUM_COLUMN_NAME')
    mode_column_name = 'trip_mode'

    trips_df = trips.to_frame()
    logger.info("Running %s with %d trips", trace_label, trips_df.shape[0])

    tours_merged = tours_merged.to_frame()
    tours_merged = tours_merged[model_settings['TOURS_MERGED_CHOOSER_COLUMNS']]

    tracing.print_summary('primary_purpose',
                          trips_df.primary_purpose,
                          value_counts=True)

    # - trips_merged - merge trips and tours_merged
    trips_merged = pd.merge(trips_df,
                            tours_merged,
                            left_on='tour_id',
                            right_index=True,
                            how="left")
    assert trips_merged.index.equals(trips.index)

    # setup skim keys
    assert ('trip_period' not in trips_merged)
    trips_merged['trip_period'] = network_los.skim_time_period_label(
        trips_merged.depart)

    orig_col = 'origin'
    dest_col = 'destination'

    constants = {}
    constants.update(config.get_model_constants(model_settings))
    constants.update({'ORIGIN': orig_col, 'DESTINATION': dest_col})

    skim_dict = network_los.get_default_skim_dict()

    odt_skim_stack_wrapper = skim_dict.wrap_3d(orig_key=orig_col,
                                               dest_key=dest_col,
                                               dim3_key='trip_period')
    dot_skim_stack_wrapper = skim_dict.wrap_3d(orig_key=dest_col,
                                               dest_key=orig_col,
                                               dim3_key='trip_period')
    od_skim_wrapper = skim_dict.wrap('origin', 'destination')

    skims = {
        "odt_skims": odt_skim_stack_wrapper,
        "dot_skims": dot_skim_stack_wrapper,
        "od_skims": od_skim_wrapper,
    }

    if network_los.zone_system == los.THREE_ZONE:
        # fixme - is this a lightweight object?
        tvpb = network_los.tvpb

        tvpb_logsum_odt = tvpb.wrap_logsum(orig_key=orig_col,
                                           dest_key=dest_col,
                                           tod_key='trip_period',
                                           segment_key='demographic_segment',
                                           cache_choices=True,
                                           trace_label=trace_label,
                                           tag='tvpb_logsum_odt')
        skims.update({
            'tvpb_logsum_odt': tvpb_logsum_odt,
            # 'tvpb_logsum_dot': tvpb_logsum_dot
        })

        # TVPB constants can appear in expressions
        constants.update(
            network_los.setting('TVPB_SETTINGS.tour_mode_choice.CONSTANTS'))

    estimator = estimation.manager.begin_estimation('trip_mode_choice')
    if estimator:
        estimator.write_coefficients(model_settings=model_settings)
        estimator.write_coefficients_template(model_settings=model_settings)
        estimator.write_spec(model_settings)
        estimator.write_model_settings(model_settings,
                                       model_settings_file_name)

    model_spec = simulate.read_model_spec(file_name=model_settings['SPEC'])
    nest_spec = config.get_logit_model_settings(model_settings)

    choices_list = []
    for primary_purpose, trips_segment in trips_merged.groupby(
            'primary_purpose'):

        segment_trace_label = tracing.extend_trace_label(
            trace_label, primary_purpose)

        logger.info("trip_mode_choice tour_type '%s' (%s trips)" % (
            primary_purpose,
            len(trips_segment.index),
        ))

        # name index so tracing knows how to slice
        assert trips_segment.index.name == 'trip_id'

        if network_los.zone_system == los.THREE_ZONE:
            tvpb_logsum_odt.extend_trace_label(primary_purpose)
            # tvpb_logsum_dot.extend_trace_label(primary_purpose)

        coefficients = simulate.get_segment_coefficients(
            model_settings, primary_purpose)

        locals_dict = {}
        locals_dict.update(constants)
        locals_dict.update(coefficients)

        expressions.annotate_preprocessors(trips_segment, locals_dict, skims,
                                           model_settings, segment_trace_label)

        if estimator:
            # write choosers after annotation
            estimator.write_choosers(trips_segment)

        locals_dict.update(skims)

        choices = mode_choice_simulate(
            choosers=trips_segment,
            spec=simulate.eval_coefficients(model_spec, coefficients,
                                            estimator),
            nest_spec=simulate.eval_nest_coefficients(nest_spec, coefficients,
                                                      segment_trace_label),
            skims=skims,
            locals_d=locals_dict,
            chunk_size=chunk_size,
            mode_column_name=mode_column_name,
            logsum_column_name=logsum_column_name,
            trace_label=trace_label,
            trace_choice_name='trip_mode_choice',
            estimator=estimator)

        if trace_hh_id:
            # trace the coefficients
            tracing.trace_df(pd.Series(locals_dict),
                             label=tracing.extend_trace_label(
                                 segment_trace_label, 'constants'),
                             transpose=False,
                             slicer='NONE')

            # so we can trace with annotations
            assign_in_place(trips_segment, choices)

            tracing.trace_df(trips_segment,
                             label=tracing.extend_trace_label(
                                 segment_trace_label, 'trip_mode'),
                             slicer='tour_id',
                             index_label='tour_id',
                             warn_if_empty=True)

        choices_list.append(choices)

        # FIXME - force garbage collection
        force_garbage_collect()

    choices_df = pd.concat(choices_list)

    # add cached tvpb_logsum tap choices for modes specified in tvpb_mode_path_types
    if network_los.zone_system == los.THREE_ZONE:

        tvpb_mode_path_types = model_settings.get('tvpb_mode_path_types')
        for mode, path_type in tvpb_mode_path_types.items():

            skim_cache = tvpb_logsum_odt.cache[path_type]

            for c in skim_cache:
                dest_col = c
                if dest_col not in choices_df:
                    choices_df[
                        dest_col] = np.nan if pd.api.types.is_numeric_dtype(
                            skim_cache[c]) else ''
                choices_df[dest_col].where(
                    choices_df[mode_column_name] != mode,
                    skim_cache[c],
                    inplace=True)

    if estimator:
        estimator.write_choices(choices_df.trip_mode)
        choices_df.trip_mode = estimator.get_survey_values(
            choices_df.trip_mode, 'trips', 'trip_mode')
        estimator.write_override_choices(choices_df.trip_mode)
        estimator.end_estimation()

    # update trips table with choices (and potionally logssums)
    trips_df = trips.to_frame()
    assign_in_place(trips_df, choices_df)

    tracing.print_summary('trip_modes',
                          trips_merged.tour_mode,
                          value_counts=True)

    tracing.print_summary('trip_mode_choice choices',
                          trips_df[mode_column_name],
                          value_counts=True)

    assert not trips_df[mode_column_name].isnull().any()

    pipeline.replace_table("trips", trips_df)

    if trace_hh_id:
        tracing.trace_df(trips_df,
                         label=tracing.extend_trace_label(
                             trace_label, 'trip_mode'),
                         slicer='trip_id',
                         index_label='trip_id',
                         warn_if_empty=True)

Example #14

Show file

File: joint_tour_composition.py Project: fscottfoti/activitysim

def joint_tour_composition(tours, households, persons, chunk_size,
                           trace_hh_id):
    """
    This model predicts the makeup of the travel party (adults, children, or mixed).
    """
    trace_label = 'joint_tour_composition'
    model_settings_file_name = 'joint_tour_composition.yaml'

    tours = tours.to_frame()
    joint_tours = tours[tours.tour_category == 'joint']

    # - if no joint tours
    if joint_tours.shape[0] == 0:
        add_null_results(trace_label, tours)
        return

    model_settings = config.read_model_settings(model_settings_file_name)
    estimator = estimation.manager.begin_estimation('joint_tour_composition')

    # - only interested in households with joint_tours
    households = households.to_frame()
    households = households[households.num_hh_joint_tours > 0]

    persons = persons.to_frame()
    persons = persons[persons.household_id.isin(households.index)]

    logger.info("Running joint_tour_composition with %d joint tours" %
                joint_tours.shape[0])

    # - run preprocessor
    preprocessor_settings = model_settings.get('preprocessor', None)
    if preprocessor_settings:

        locals_dict = {
            'persons': persons,
            'hh_time_window_overlap': hh_time_window_overlap
        }

        expressions.assign_columns(df=households,
                                   model_settings=preprocessor_settings,
                                   locals_dict=locals_dict,
                                   trace_label=trace_label)

    joint_tours_merged = pd.merge(joint_tours,
                                  households,
                                  left_on='household_id',
                                  right_index=True,
                                  how='left')

    # - simple_simulate
    model_spec = simulate.read_model_spec(file_name=model_settings['SPEC'])
    coefficients_df = simulate.read_model_coefficients(model_settings)
    model_spec = simulate.eval_coefficients(model_spec, coefficients_df,
                                            estimator)

    nest_spec = config.get_logit_model_settings(model_settings)
    constants = config.get_model_constants(model_settings)

    if estimator:
        estimator.write_spec(model_settings)
        estimator.write_model_settings(model_settings,
                                       model_settings_file_name)
        estimator.write_coefficients(coefficients_df, model_settings)
        estimator.write_choosers(joint_tours_merged)

    choices = simulate.simple_simulate(choosers=joint_tours_merged,
                                       spec=model_spec,
                                       nest_spec=nest_spec,
                                       locals_d=constants,
                                       chunk_size=chunk_size,
                                       trace_label=trace_label,
                                       trace_choice_name='composition',
                                       estimator=estimator)

    # convert indexes to alternative names
    choices = pd.Series(model_spec.columns[choices.values],
                        index=choices.index)

    if estimator:
        estimator.write_choices(choices)
        choices = estimator.get_survey_values(choices, 'tours', 'composition')
        estimator.write_override_choices(choices)
        estimator.end_estimation()

    # add composition column to tours for tracing
    joint_tours['composition'] = choices

    # reindex since we ran model on a subset of households
    tours['composition'] = choices.reindex(tours.index).fillna('').astype(str)
    pipeline.replace_table("tours", tours)

    tracing.print_summary('joint_tour_composition',
                          joint_tours.composition,
                          value_counts=True)

    if trace_hh_id:
        tracing.trace_df(joint_tours,
                         label="joint_tour_composition.joint_tours",
                         slicer='household_id')

Example #15

Show file

File: trip_mode_choice.py Project: arunakkin/activitysim

def trip_mode_choice(trips, network_los, chunk_size, trace_hh_id):
    """
    Trip mode choice - compute trip_mode (same values as for tour_mode) for each trip.

    Modes for each primary tour putpose are calculated separately because they have different
    coefficient values (stored in trip_mode_choice_coefficients.csv coefficient file.)

    Adds trip_mode column to trip table
    """

    trace_label = 'trip_mode_choice'
    model_settings_file_name = 'trip_mode_choice.yaml'
    model_settings = config.read_model_settings(model_settings_file_name)

    logsum_column_name = model_settings.get('MODE_CHOICE_LOGSUM_COLUMN_NAME')
    mode_column_name = 'trip_mode'

    trips_df = trips.to_frame()
    logger.info("Running %s with %d trips", trace_label, trips_df.shape[0])

    # give trip mode choice the option to run without calling tours_merged. Useful for xborder
    # model where tour_od_choice needs trip mode choice logsums before some of the join keys
    # needed by tour_merged (e.g. home_zone_id) exist
    tours_cols = [
        col for col in model_settings['TOURS_MERGED_CHOOSER_COLUMNS']
        if col not in trips_df.columns
    ]
    if len(tours_cols) > 0:
        tours_merged = inject.get_table('tours_merged').to_frame(
            columns=tours_cols)
    else:
        tours_merged = pd.DataFrame()

    # - trips_merged - merge trips and tours_merged
    trips_merged = pd.merge(trips_df,
                            tours_merged,
                            left_on='tour_id',
                            right_index=True,
                            how="left")
    assert trips_merged.index.equals(trips.index)

    tracing.print_summary('primary_purpose',
                          trips_df.primary_purpose,
                          value_counts=True)

    # setup skim keys
    assert ('trip_period' not in trips_merged)
    trips_merged['trip_period'] = network_los.skim_time_period_label(
        trips_merged.depart)

    orig_col = 'origin'
    dest_col = 'destination'
    min_per_period = network_los.skim_time_periods['period_minutes']
    periods_per_hour = 60 / min_per_period

    constants = {}
    constants.update(config.get_model_constants(model_settings))
    constants.update({
        'ORIGIN': orig_col,
        'DESTINATION': dest_col,
        'MIN_PER_PERIOD': min_per_period,
        'PERIODS_PER_HOUR': periods_per_hour
    })

    skim_dict = network_los.get_default_skim_dict()

    odt_skim_stack_wrapper = skim_dict.wrap_3d(orig_key=orig_col,
                                               dest_key=dest_col,
                                               dim3_key='trip_period')
    dot_skim_stack_wrapper = skim_dict.wrap_3d(orig_key=dest_col,
                                               dest_key=orig_col,
                                               dim3_key='trip_period')
    od_skim_wrapper = skim_dict.wrap('origin', 'destination')

    skims = {
        "odt_skims": odt_skim_stack_wrapper,
        "dot_skims": dot_skim_stack_wrapper,
        "od_skims": od_skim_wrapper,
    }

    if network_los.zone_system == los.THREE_ZONE:
        # fixme - is this a lightweight object?
        tvpb = network_los.tvpb
        tvpb_recipe = model_settings.get('TVPB_recipe', 'tour_mode_choice')
        tvpb_logsum_odt = tvpb.wrap_logsum(orig_key=orig_col,
                                           dest_key=dest_col,
                                           tod_key='trip_period',
                                           segment_key='demographic_segment',
                                           recipe=tvpb_recipe,
                                           cache_choices=True,
                                           trace_label=trace_label,
                                           tag='tvpb_logsum_odt')
        skims.update({
            'tvpb_logsum_odt': tvpb_logsum_odt,
        })

        # This if-clause gives the user the option of NOT inheriting constants
        # from the tvpb settings. previously, these constants were inherited
        # automatically, which had the undesirable effect of overwriting any
        # trip mode choice model constants/coefficients that shared the same
        # name. The default behavior is still the same (True), but the user
        # can now avoid any chance of squashing these local variables by
        # adding `use_TVPB_constants: False` to the trip_mode_choice.yaml file.
        # the tvpb will still use the constants as defined in the recipe
        # specified above in `tvpb.wrap_logsum()` but they will not be used
        # in the trip mode choice expressions.
        if model_settings.get('use_TVPB_constants', True):
            constants.update(
                network_los.setting(
                    'TVPB_SETTINGS.tour_mode_choice.CONSTANTS'))

    # don't create estimation data bundle if trip mode choice is being called
    # from another model step (e.g. tour mode choice logsum creation)
    if pipeline._PIPELINE.rng().step_name != 'trip_mode_choice':
        estimator = None
    else:
        estimator = estimation.manager.begin_estimation('trip_mode_choice')
    if estimator:
        estimator.write_coefficients(model_settings=model_settings)
        estimator.write_coefficients_template(model_settings=model_settings)
        estimator.write_spec(model_settings)
        estimator.write_model_settings(model_settings,
                                       model_settings_file_name)

    model_spec = simulate.read_model_spec(file_name=model_settings['SPEC'])
    nest_spec = config.get_logit_model_settings(model_settings)

    choices_list = []
    for primary_purpose, trips_segment in trips_merged.groupby(
            'primary_purpose'):

        segment_trace_label = tracing.extend_trace_label(
            trace_label, primary_purpose)

        logger.info("trip_mode_choice tour_type '%s' (%s trips)" % (
            primary_purpose,
            len(trips_segment.index),
        ))

        # name index so tracing knows how to slice
        assert trips_segment.index.name == 'trip_id'

        if network_los.zone_system == los.THREE_ZONE:
            tvpb_logsum_odt.extend_trace_label(primary_purpose)
            # tvpb_logsum_dot.extend_trace_label(primary_purpose)

        coefficients = simulate.get_segment_coefficients(
            model_settings, primary_purpose)

        locals_dict = {}
        locals_dict.update(constants)

        constants_keys = constants.keys()
        if any([coeff in constants_keys for coeff in coefficients.keys()]):
            logger.warning(
                "coefficients are obscuring constants in locals_dict")
        locals_dict.update(coefficients)

        # have to initialize chunker for preprocessing in order to access
        # tvpb logsum terms in preprocessor expressions.
        with chunk.chunk_log(tracing.extend_trace_label(
                trace_label, 'preprocessing'),
                             base=True):
            expressions.annotate_preprocessors(trips_segment, locals_dict,
                                               skims, model_settings,
                                               segment_trace_label)

        if estimator:
            # write choosers after annotation
            estimator.write_choosers(trips_segment)

        locals_dict.update(skims)

        choices = mode_choice_simulate(
            choosers=trips_segment,
            spec=simulate.eval_coefficients(model_spec, coefficients,
                                            estimator),
            nest_spec=simulate.eval_nest_coefficients(nest_spec, coefficients,
                                                      segment_trace_label),
            skims=skims,
            locals_d=locals_dict,
            chunk_size=chunk_size,
            mode_column_name=mode_column_name,
            logsum_column_name=logsum_column_name,
            trace_label=segment_trace_label,
            trace_choice_name='trip_mode_choice',
            estimator=estimator)

        if trace_hh_id:
            # trace the coefficients
            tracing.trace_df(pd.Series(locals_dict),
                             label=tracing.extend_trace_label(
                                 segment_trace_label, 'constants'),
                             transpose=False,
                             slicer='NONE')

            # so we can trace with annotations
            assign_in_place(trips_segment, choices)

            tracing.trace_df(trips_segment,
                             label=tracing.extend_trace_label(
                                 segment_trace_label, 'trip_mode'),
                             slicer='tour_id',
                             index_label='tour_id',
                             warn_if_empty=True)

        choices_list.append(choices)

    choices_df = pd.concat(choices_list)

    # add cached tvpb_logsum tap choices for modes specified in tvpb_mode_path_types
    if network_los.zone_system == los.THREE_ZONE:

        tvpb_mode_path_types = model_settings.get('tvpb_mode_path_types')
        for mode, path_type in tvpb_mode_path_types.items():

            skim_cache = tvpb_logsum_odt.cache[path_type]

            for c in skim_cache:
                dest_col = c
                if dest_col not in choices_df:
                    choices_df[
                        dest_col] = np.nan if pd.api.types.is_numeric_dtype(
                            skim_cache[c]) else ''
                choices_df[dest_col].where(
                    choices_df[mode_column_name] != mode,
                    skim_cache[c],
                    inplace=True)

    if estimator:
        estimator.write_choices(choices_df.trip_mode)
        choices_df.trip_mode = estimator.get_survey_values(
            choices_df.trip_mode, 'trips', 'trip_mode')
        estimator.write_override_choices(choices_df.trip_mode)
        estimator.end_estimation()
    trips_df = trips.to_frame()
    assign_in_place(trips_df, choices_df)

    tracing.print_summary('trip_modes',
                          trips_merged.tour_mode,
                          value_counts=True)

    tracing.print_summary('trip_mode_choice choices',
                          trips_df[mode_column_name],
                          value_counts=True)

    assert not trips_df[mode_column_name].isnull().any()

    pipeline.replace_table("trips", trips_df)

    if trace_hh_id:
        tracing.trace_df(trips_df,
                         label=tracing.extend_trace_label(
                             trace_label, 'trip_mode'),
                         slicer='trip_id',
                         index_label='trip_id',
                         warn_if_empty=True)

Example #16

Show file

File: mandatory_tour_frequency.py Project: ymaVTA/activitysim

def mandatory_tour_frequency(persons_merged, chunk_size, trace_hh_id):
    """
    This model predicts the frequency of making mandatory trips (see the
    alternatives above) - these trips include work and school in some combination.
    """
    trace_label = 'mandatory_tour_frequency'
    model_settings_file_name = 'mandatory_tour_frequency.yaml'

    model_settings = config.read_model_settings(model_settings_file_name)

    choosers = persons_merged.to_frame()
    # filter based on results of CDAP
    choosers = choosers[choosers.cdap_activity == 'M']
    logger.info("Running mandatory_tour_frequency with %d persons",
                len(choosers))

    # - if no mandatory tours
    if choosers.shape[0] == 0:
        add_null_results(trace_label, model_settings)
        return

    # - preprocessor
    preprocessor_settings = model_settings.get('preprocessor', None)
    if preprocessor_settings:

        locals_dict = {}

        expressions.assign_columns(df=choosers,
                                   model_settings=preprocessor_settings,
                                   locals_dict=locals_dict,
                                   trace_label=trace_label)

    estimator = estimation.manager.begin_estimation('mandatory_tour_frequency')

    model_spec = simulate.read_model_spec(file_name=model_settings['SPEC'])
    coefficients_df = simulate.read_model_coefficients(model_settings)
    model_spec = simulate.eval_coefficients(model_spec, coefficients_df,
                                            estimator)

    nest_spec = config.get_logit_model_settings(model_settings)
    constants = config.get_model_constants(model_settings)

    if estimator:
        estimator.write_spec(model_settings)
        estimator.write_model_settings(model_settings,
                                       model_settings_file_name)
        estimator.write_coefficients(coefficients_df)
        estimator.write_choosers(choosers)

    choices = simulate.simple_simulate(
        choosers=choosers,
        spec=model_spec,
        nest_spec=nest_spec,
        locals_d=constants,
        chunk_size=chunk_size,
        trace_label=trace_label,
        trace_choice_name='mandatory_tour_frequency',
        estimator=estimator)

    # convert indexes to alternative names
    choices = pd.Series(model_spec.columns[choices.values],
                        index=choices.index)

    if estimator:
        estimator.write_choices(choices)
        choices = estimator.get_survey_values(choices, 'persons',
                                              'mandatory_tour_frequency')
        estimator.write_override_choices(choices)
        estimator.end_estimation()

    # - create mandatory tours
    """
    This reprocesses the choice of index of the mandatory tour frequency
    alternatives into an actual dataframe of tours.  Ending format is
    the same as got non_mandatory_tours except trip types are "work" and "school"
    """
    alternatives = simulate.read_model_alts(
        'mandatory_tour_frequency_alternatives.csv', set_index='alt')
    choosers['mandatory_tour_frequency'] = choices.reindex(choosers.index)

    mandatory_tours = process_mandatory_tours(
        persons=choosers, mandatory_tour_frequency_alts=alternatives)

    tours = pipeline.extend_table("tours", mandatory_tours)
    tracing.register_traceable_table('tours', mandatory_tours)
    pipeline.get_rn_generator().add_channel('tours', mandatory_tours)

    # - annotate persons
    persons = inject.get_table('persons').to_frame()

    # need to reindex as we only handled persons with cdap_activity == 'M'
    persons['mandatory_tour_frequency'] = choices.reindex(
        persons.index).fillna('').astype(str)

    expressions.assign_columns(
        df=persons,
        model_settings=model_settings.get('annotate_persons'),
        trace_label=tracing.extend_trace_label(trace_label,
                                               'annotate_persons'))

    pipeline.replace_table("persons", persons)

    tracing.print_summary('mandatory_tour_frequency',
                          persons.mandatory_tour_frequency,
                          value_counts=True)

    if trace_hh_id:
        tracing.trace_df(mandatory_tours,
                         label="mandatory_tour_frequency.mandatory_tours",
                         warn_if_empty=True)

        tracing.trace_df(persons,
                         label="mandatory_tour_frequency.persons",
                         warn_if_empty=True)

Example #17

Show file

def free_parking(persons_merged, persons, households, skim_dict, skim_stack,
                 chunk_size, trace_hh_id, locutor):
    """

    """

    trace_label = 'free_parking'
    model_settings_file_name = 'free_parking.yaml'

    choosers = persons_merged.to_frame()
    choosers = choosers[choosers.workplace_taz > -1]
    logger.info("Running %s with %d persons", trace_label, len(choosers))

    model_settings = config.read_model_settings(model_settings_file_name)
    estimator = estimation.manager.begin_estimation('free_parking')

    constants = config.get_model_constants(model_settings)

    # - preprocessor
    preprocessor_settings = model_settings.get('preprocessor', None)
    if preprocessor_settings:

        locals_d = {}
        if constants is not None:
            locals_d.update(constants)

        expressions.assign_columns(df=choosers,
                                   model_settings=preprocessor_settings,
                                   locals_dict=locals_d,
                                   trace_label=trace_label)

    model_spec = simulate.read_model_spec(file_name=model_settings['SPEC'])
    coefficients_df = simulate.read_model_coefficients(model_settings)
    model_spec = simulate.eval_coefficients(model_spec, coefficients_df,
                                            estimator)

    nest_spec = config.get_logit_model_settings(model_settings)

    if estimator:
        estimator.write_model_settings(model_settings,
                                       model_settings_file_name)
        estimator.write_spec(model_settings)
        estimator.write_coefficients(coefficients_df)
        estimator.write_choosers(choosers)

    choices = simulate.simple_simulate(
        choosers=choosers,
        spec=model_spec,
        nest_spec=nest_spec,
        locals_d=constants,
        chunk_size=chunk_size,
        trace_label=trace_label,
        trace_choice_name='free_parking_at_work',
        estimator=estimator)

    free_parking_alt = model_settings['FREE_PARKING_ALT']
    choices = (choices == free_parking_alt)

    if estimator:
        estimator.write_choices(choices)
        choices = estimator.get_survey_values(choices, 'persons',
                                              'free_parking_at_work')
        estimator.write_override_choices(choices)
        estimator.end_estimation()

    persons = persons.to_frame()
    persons['free_parking_at_work'] = choices.reindex(
        persons.index).fillna(0).astype(bool)

    pipeline.replace_table("persons", persons)

    tracing.print_summary('free_parking',
                          persons.free_parking_at_work,
                          value_counts=True)

    if trace_hh_id:
        tracing.trace_df(persons, label=trace_label, warn_if_empty=True)

Example #18

Show file

File: non_mandatory_scheduling.py Project: jiaodaxiaozi/activitysim

def non_mandatory_tour_scheduling(tours, persons_merged, tdd_alts, chunk_size,
                                  trace_hh_id):
    """
    This model predicts the departure time and duration of each activity for non-mandatory tours
    """

    trace_label = 'non_mandatory_tour_scheduling'
    model_settings_file_name = 'non_mandatory_tour_scheduling.yaml'
    model_settings = config.read_model_settings(model_settings_file_name)

    tours = tours.to_frame()
    non_mandatory_tours = tours[tours.tour_category == 'non_mandatory']

    logger.info("Running non_mandatory_tour_scheduling with %d tours",
                len(tours))

    persons_merged = persons_merged.to_frame()

    if 'SIMULATE_CHOOSER_COLUMNS' in model_settings:
        persons_merged =\
            expressions.filter_chooser_columns(persons_merged,
                                               model_settings['SIMULATE_CHOOSER_COLUMNS'])

    constants = config.get_model_constants(model_settings)

    # - run preprocessor to annotate choosers
    preprocessor_settings = model_settings.get('preprocessor', None)
    if preprocessor_settings:

        locals_d = {}
        if constants is not None:
            locals_d.update(constants)

        expressions.assign_columns(df=non_mandatory_tours,
                                   model_settings=preprocessor_settings,
                                   locals_dict=locals_d,
                                   trace_label=trace_label)

    timetable = inject.get_injectable("timetable")

    estimator = estimation.manager.begin_estimation(
        'non_mandatory_tour_scheduling')

    model_spec = simulate.read_model_spec(file_name=model_settings['SPEC'])
    coefficients_df = simulate.read_model_coefficients(model_settings)
    model_spec = simulate.eval_coefficients(model_spec, coefficients_df,
                                            estimator)

    if estimator:
        estimator.write_model_settings(model_settings,
                                       model_settings_file_name)
        estimator.write_spec(model_settings)
        estimator.write_coefficients(coefficients_df)
        timetable.begin_transaction(estimator)

    # - non_mandatory tour scheduling is not segmented by tour type
    spec_info = {'spec': model_spec, 'estimator': estimator}

    choices = vectorize_tour_scheduling(non_mandatory_tours,
                                        persons_merged,
                                        tdd_alts,
                                        timetable,
                                        tour_segments=spec_info,
                                        tour_segment_col=None,
                                        model_settings=model_settings,
                                        chunk_size=chunk_size,
                                        trace_label=trace_label)

    if estimator:
        estimator.write_choices(choices)
        choices = estimator.get_survey_values(choices, 'tours', 'tdd')
        estimator.write_override_choices(choices)
        estimator.end_estimation()

        # update timetable to reflect the override choices (assign tours in tour_num order)
        timetable.rollback()
        for tour_num, nth_tours in non_mandatory_tours.groupby('tour_num',
                                                               sort=True):
            timetable.assign(window_row_ids=nth_tours['person_id'],
                             tdds=choices.reindex(nth_tours.index))

    timetable.replace_table()

    # choices are tdd alternative ids
    # we want to add start, end, and duration columns to tours, which we have in tdd_alts table
    choices = pd.merge(choices.to_frame('tdd'),
                       tdd_alts,
                       left_on=['tdd'],
                       right_index=True,
                       how='left')

    assign_in_place(tours, choices)
    pipeline.replace_table("tours", tours)

    # updated df for tracing
    non_mandatory_tours = tours[tours.tour_category == 'non_mandatory']

    tracing.dump_df(DUMP,
                    tt.tour_map(persons_merged, non_mandatory_tours, tdd_alts),
                    trace_label, 'tour_map')

    if trace_hh_id:
        tracing.trace_df(non_mandatory_tours,
                         label="non_mandatory_tour_scheduling",
                         slicer='person_id',
                         index_label='tour_id',
                         columns=None,
                         warn_if_empty=True)

Example #19

Show file

def atwork_subtour_scheduling(
        tours,
        persons_merged,
        tdd_alts,
        skim_dict,
        chunk_size,
        trace_hh_id):
    """
    This model predicts the departure time and duration of each activity for at work subtours tours
    """

    trace_label = 'atwork_subtour_scheduling'
    model_settings_file_name = 'tour_scheduling_atwork.yaml'

    tours = tours.to_frame()
    subtours = tours[tours.tour_category == 'atwork']

    # - if no atwork subtours
    if subtours.shape[0] == 0:
        tracing.no_results(trace_label)
        return

    model_settings = config.read_model_settings(model_settings_file_name)
    estimator = estimation.manager.begin_estimation('atwork_subtour_scheduling')

    model_spec = simulate.read_model_spec(file_name=model_settings['SPEC'])
    coefficients_df = simulate.read_model_coefficients(model_settings)
    model_spec = simulate.eval_coefficients(model_spec, coefficients_df, estimator)

    persons_merged = persons_merged.to_frame()

    logger.info("Running %s with %d tours", trace_label, len(subtours))

    # preprocessor
    constants = config.get_model_constants(model_settings)
    od_skim_wrapper = skim_dict.wrap('origin', 'destination')
    do_skim_wrapper = skim_dict.wrap('destination', 'origin')
    skims = {
        "od_skims": od_skim_wrapper,
        "do_skims": do_skim_wrapper,
    }
    annotate_preprocessors(
        subtours, constants, skims,
        model_settings, trace_label)

    # parent_tours table with columns ['tour_id', 'tdd'] index = tour_id
    parent_tour_ids = subtours.parent_tour_id.astype(np.int64).unique()
    parent_tours = pd.DataFrame({'tour_id': parent_tour_ids}, index=parent_tour_ids)
    parent_tours = parent_tours.merge(tours[['tdd']], left_index=True, right_index=True)

    if estimator:
        estimator.write_model_settings(model_settings, model_settings_file_name)
        estimator.write_spec(model_settings)
        estimator.write_coefficients(coefficients_df)
        # we don't need to update timetable because subtours are scheduled inside work trip windows

    choices = vectorize_subtour_scheduling(
        parent_tours,
        subtours,
        persons_merged,
        tdd_alts, model_spec,
        model_settings,
        estimator=estimator,
        chunk_size=chunk_size,
        trace_label=trace_label)

    if estimator:
        estimator.write_choices(choices)
        choices = estimator.get_survey_values(choices, 'tours', 'tdd')
        estimator.write_override_choices(choices)
        estimator.end_estimation()

    # choices are tdd alternative ids
    # we want to add start, end, and duration columns to tours, which we have in tdd_alts table
    tdd_choices = pd.merge(choices.to_frame('tdd'), tdd_alts, left_on=['tdd'], right_index=True, how='left')

    assign_in_place(tours, tdd_choices)
    pipeline.replace_table("tours", tours)

    if trace_hh_id:
        tracing.trace_df(tours[tours.tour_category == 'atwork'],
                         label="atwork_subtour_scheduling",
                         slicer='person_id',
                         index_label='tour_id',
                         columns=None)

    if DUMP:
        subtours = tours[tours.tour_category == 'atwork']
        parent_tours = tours[tours.index.isin(subtours.parent_tour_id)]

        tracing.dump_df(DUMP, subtours, trace_label, 'sub_tours')
        tracing.dump_df(DUMP, parent_tours, trace_label, 'parent_tours')

        parent_tours['parent_tour_id'] = parent_tours.index
        subtours = pd.concat([parent_tours, subtours])
        tracing.dump_df(DUMP,
                        tt.tour_map(parent_tours, subtours, tdd_alts,
                                    persons_id_col='parent_tour_id'),
                        trace_label, 'tour_map')

Example #20

Show file

File: joint_tour_frequency.py Project: fscottfoti/activitysim

def joint_tour_frequency(households, persons, chunk_size, trace_hh_id):
    """
    This model predicts the frequency of making fully joint trips (see the
    alternatives above).
    """
    trace_label = 'joint_tour_frequency'
    model_settings_file_name = 'joint_tour_frequency.yaml'

    estimator = estimation.manager.begin_estimation('joint_tour_frequency')

    model_settings = config.read_model_settings(model_settings_file_name)

    alternatives = simulate.read_model_alts(
        'joint_tour_frequency_alternatives.csv', set_index='alt')

    # - only interested in households with more than one cdap travel_active person and
    # - at least one non-preschooler
    households = households.to_frame()
    multi_person_households = households[
        households.participates_in_jtf_model].copy()

    # - only interested in persons in multi_person_households
    # FIXME - gratuitous pathological efficiency move, just let yaml specify persons?
    persons = persons.to_frame()
    persons = persons[persons.household_id.isin(multi_person_households.index)]

    logger.info(
        "Running joint_tour_frequency with %d multi-person households" %
        multi_person_households.shape[0])

    # - preprocessor
    preprocessor_settings = model_settings.get('preprocessor', None)
    if preprocessor_settings:

        locals_dict = {
            'persons': persons,
            'hh_time_window_overlap': hh_time_window_overlap
        }

        expressions.assign_columns(df=multi_person_households,
                                   model_settings=preprocessor_settings,
                                   locals_dict=locals_dict,
                                   trace_label=trace_label)

    model_spec = simulate.read_model_spec(file_name=model_settings['SPEC'])
    coefficients_df = simulate.read_model_coefficients(model_settings)
    model_spec = simulate.eval_coefficients(model_spec, coefficients_df,
                                            estimator)

    nest_spec = config.get_logit_model_settings(model_settings)
    constants = config.get_model_constants(model_settings)

    if estimator:
        estimator.write_spec(model_settings)
        estimator.write_model_settings(model_settings,
                                       model_settings_file_name)
        estimator.write_coefficients(coefficients_df, model_settings)
        estimator.write_choosers(multi_person_households)

    choices = simulate.simple_simulate(
        choosers=multi_person_households,
        spec=model_spec,
        nest_spec=nest_spec,
        locals_d=constants,
        chunk_size=chunk_size,
        trace_label=trace_label,
        trace_choice_name='joint_tour_frequency',
        estimator=estimator)

    # convert indexes to alternative names
    choices = pd.Series(model_spec.columns[choices.values],
                        index=choices.index)

    if estimator:
        estimator.write_choices(choices)
        choices = estimator.get_survey_values(choices, 'households',
                                              'joint_tour_frequency')
        estimator.write_override_choices(choices)
        estimator.end_estimation()

    # - create joint_tours based on joint_tour_frequency choices

    # - we need a person_id in order to generate the tour index (and for register_traceable_table)
    # - but we don't know the tour participants yet
    # - so we arbitrarily choose the first person in the household
    # - to be point person for the purpose of generating an index and setting origin
    temp_point_persons = persons.loc[persons.PNUM == 1]
    temp_point_persons['person_id'] = temp_point_persons.index
    temp_point_persons = temp_point_persons.set_index('household_id')
    temp_point_persons = temp_point_persons[['person_id', 'home_zone_id']]

    joint_tours = \
        process_joint_tours(choices, alternatives, temp_point_persons)

    tours = pipeline.extend_table("tours", joint_tours)

    tracing.register_traceable_table('tours', joint_tours)
    pipeline.get_rn_generator().add_channel('tours', joint_tours)

    # - annotate households

    # we expect there to be an alt with no tours - which we can use to backfill non-travelers
    no_tours_alt = (alternatives.sum(axis=1) == 0).index[0]
    households['joint_tour_frequency'] = choices.reindex(
        households.index).fillna(no_tours_alt).astype(str)

    households['num_hh_joint_tours'] = joint_tours.groupby('household_id').size().\
        reindex(households.index).fillna(0).astype(np.int8)

    pipeline.replace_table("households", households)

    tracing.print_summary('joint_tour_frequency',
                          households.joint_tour_frequency,
                          value_counts=True)

    if trace_hh_id:
        tracing.trace_df(households, label="joint_tour_frequency.households")

        tracing.trace_df(joint_tours,
                         label="joint_tour_frequency.joint_tours",
                         slicer='household_id')

    if estimator:
        survey_tours = estimation.manager.get_survey_table('tours')
        survey_tours = survey_tours[survey_tours.tour_category == 'joint']

        print(f"len(survey_tours) {len(survey_tours)}")
        print(f"len(joint_tours) {len(joint_tours)}")

        different = False
        survey_tours_not_in_tours = survey_tours[~survey_tours.index.
                                                 isin(joint_tours.index)]
        if len(survey_tours_not_in_tours) > 0:
            print(f"survey_tours_not_in_tours\n{survey_tours_not_in_tours}")
            different = True
        tours_not_in_survey_tours = joint_tours[~joint_tours.index.
                                                isin(survey_tours.index)]
        if len(survey_tours_not_in_tours) > 0:
            print(f"tours_not_in_survey_tours\n{tours_not_in_survey_tours}")
            different = True
        assert not different

Example #21

Show file

def _compute_logsums(alt_tdd, tours_merged, tour_purpose, model_settings,
                     trace_label):
    """
    compute logsums for tours using skims for alt_tdd out_period and in_period
    """

    trace_label = tracing.extend_trace_label(trace_label, 'logsums')

    logsum_settings = config.read_model_settings(
        model_settings['LOGSUM_SETTINGS'])

    choosers = alt_tdd.join(tours_merged, how='left', rsuffix='_chooser')
    logger.info("%s compute_logsums for %d choosers%s alts" %
                (trace_label, choosers.shape[0], alt_tdd.shape[0]))

    # - setup skims

    skim_dict = inject.get_injectable('skim_dict')
    skim_stack = inject.get_injectable('skim_stack')

    orig_col_name = 'TAZ'
    dest_col_name = model_settings.get('DESTINATION_FOR_TOUR_PURPOSE').get(
        tour_purpose)

    odt_skim_stack_wrapper = skim_stack.wrap(left_key=orig_col_name,
                                             right_key=dest_col_name,
                                             skim_key='out_period')
    dot_skim_stack_wrapper = skim_stack.wrap(left_key=dest_col_name,
                                             right_key=orig_col_name,
                                             skim_key='in_period')
    odr_skim_stack_wrapper = skim_stack.wrap(left_key=orig_col_name,
                                             right_key=dest_col_name,
                                             skim_key='in_period')
    dor_skim_stack_wrapper = skim_stack.wrap(left_key=dest_col_name,
                                             right_key=orig_col_name,
                                             skim_key='out_period')
    od_skim_stack_wrapper = skim_dict.wrap(orig_col_name, dest_col_name)

    skims = {
        "odt_skims": odt_skim_stack_wrapper,
        "dot_skims": dot_skim_stack_wrapper,
        "odr_skims": odr_skim_stack_wrapper,
        "dor_skims": dor_skim_stack_wrapper,
        "od_skims": od_skim_stack_wrapper,
        'orig_col_name': orig_col_name,
        'dest_col_name': dest_col_name,
    }

    # - locals_dict
    constants = config.get_model_constants(logsum_settings)

    locals_dict = {}
    locals_dict.update(constants)
    locals_dict.update(skims)

    # - run preprocessor to annotate choosers
    # allow specification of alternate preprocessor for nontour choosers
    preprocessor = model_settings.get('LOGSUM_PREPROCESSOR', 'preprocessor')
    preprocessor_settings = logsum_settings[preprocessor]

    if preprocessor_settings:

        simulate.set_skim_wrapper_targets(choosers, skims)

        expressions.assign_columns(df=choosers,
                                   model_settings=preprocessor_settings,
                                   locals_dict=locals_dict,
                                   trace_label=trace_label)

    # - compute logsums

    coefficients = simulate.get_segment_coefficients(logsum_settings,
                                                     tour_purpose)
    logsum_spec = simulate.read_model_spec(file_name=logsum_settings['SPEC'])
    logsum_spec = simulate.eval_coefficients(logsum_spec,
                                             coefficients,
                                             estimator=None)

    nest_spec = config.get_logit_model_settings(logsum_settings)
    nest_spec = simulate.eval_nest_coefficients(nest_spec, coefficients)

    # constrained coefficients can appear in expressions
    locals_dict.update(coefficients)

    logsums = simulate.simple_simulate_logsums(choosers,
                                               logsum_spec,
                                               nest_spec,
                                               skims=skims,
                                               locals_d=locals_dict,
                                               chunk_size=0,
                                               trace_label=trace_label)

    return logsums

Example #22

Show file

File: joint_tour_participation.py Project: fscottfoti/activitysim

def joint_tour_participation(
        tours, persons_merged,
        chunk_size,
        trace_hh_id):
    """
    Predicts for each eligible person to participate or not participate in each joint tour.
    """
    trace_label = 'joint_tour_participation'
    model_settings_file_name = 'joint_tour_participation.yaml'
    model_settings = config.read_model_settings(model_settings_file_name)

    tours = tours.to_frame()
    joint_tours = tours[tours.tour_category == 'joint']

    # - if no joint tours
    if joint_tours.shape[0] == 0:
        add_null_results(model_settings, trace_label)
        return

    persons_merged = persons_merged.to_frame()

    # - create joint_tour_participation_candidates table
    candidates = joint_tour_participation_candidates(joint_tours, persons_merged)
    tracing.register_traceable_table('joint_tour_participants', candidates)
    pipeline.get_rn_generator().add_channel('joint_tour_participants', candidates)

    logger.info("Running joint_tours_participation with %d potential participants (candidates)" %
                candidates.shape[0])

    # - preprocessor
    preprocessor_settings = model_settings.get('preprocessor', None)
    if preprocessor_settings:

        locals_dict = {
            'person_time_window_overlap': person_time_window_overlap,
            'persons': persons_merged
        }

        expressions.assign_columns(
            df=candidates,
            model_settings=preprocessor_settings,
            locals_dict=locals_dict,
            trace_label=trace_label)

    # - simple_simulate

    estimator = estimation.manager.begin_estimation('joint_tour_participation')

    model_spec = simulate.read_model_spec(file_name=model_settings['SPEC'])
    coefficients_df = simulate.read_model_coefficients(model_settings)
    model_spec = simulate.eval_coefficients(model_spec, coefficients_df, estimator)

    nest_spec = config.get_logit_model_settings(model_settings)
    constants = config.get_model_constants(model_settings)

    if estimator:
        estimator.write_model_settings(model_settings, model_settings_file_name)
        estimator.write_spec(model_settings)
        estimator.write_coefficients(coefficients_df, model_settings)
        estimator.write_choosers(candidates)

    # add tour-based chunk_id so we can chunk all trips in tour together
    assert 'chunk_id' not in candidates.columns
    unique_household_ids = candidates.household_id.unique()
    household_chunk_ids = pd.Series(range(len(unique_household_ids)), index=unique_household_ids)
    candidates['chunk_id'] = reindex(household_chunk_ids, candidates.household_id)

    choices = simulate.simple_simulate_by_chunk_id(
        choosers=candidates,
        spec=model_spec,
        nest_spec=nest_spec,
        locals_d=constants,
        chunk_size=chunk_size,
        trace_label=trace_label,
        trace_choice_name='participation',
        custom_chooser=participants_chooser,
        estimator=estimator)

    # choice is boolean (participate or not)
    choice_col = model_settings.get('participation_choice', 'participate')
    assert choice_col in model_spec.columns, \
        "couldn't find participation choice column '%s' in spec"
    PARTICIPATE_CHOICE = model_spec.columns.get_loc(choice_col)

    participate = (choices == PARTICIPATE_CHOICE)

    if estimator:
        estimator.write_choices(choices)

        # we override the 'participate' boolean series, instead of raw alternative index in 'choices' series
        # its value depends on whether the candidate's 'participant_id' is in the joint_tour_participant index
        survey_participants_df = estimator.get_survey_table('joint_tour_participants')
        participate = pd.Series(choices.index.isin(survey_participants_df.index.values), index=choices.index)

        # but estimation software wants to know the choices value (alternative index)
        choices = participate.replace({True: PARTICIPATE_CHOICE, False: 1-PARTICIPATE_CHOICE})
        # estimator.write_override_choices(participate)  # write choices as boolean participate
        estimator.write_override_choices(choices)  # write choices as int alt indexes

        estimator.end_estimation()

    # satisfaction indexed by tour_id
    tour_satisfaction = get_tour_satisfaction(candidates, participate)

    assert tour_satisfaction.all()

    candidates['satisfied'] = reindex(tour_satisfaction, candidates.tour_id)

    PARTICIPANT_COLS = ['tour_id', 'household_id', 'person_id']
    participants = candidates[participate][PARTICIPANT_COLS].copy()

    # assign participant_num
    # FIXME do we want something smarter than the participant with the lowest person_id?
    participants['participant_num'] = \
        participants.sort_values(by=['tour_id', 'person_id']).\
        groupby('tour_id').cumcount() + 1

    pipeline.replace_table("joint_tour_participants", participants)

    # drop channel as we aren't using any more (and it has candidates that weren't chosen)
    pipeline.get_rn_generator().drop_channel('joint_tour_participants')

    # - assign joint tour 'point person' (participant_num == 1)
    point_persons = participants[participants.participant_num == 1]
    joint_tours['person_id'] = point_persons.set_index('tour_id').person_id

    # update number_of_participants which was initialized to 1
    joint_tours['number_of_participants'] = participants.groupby('tour_id').size()

    assign_in_place(tours, joint_tours[['person_id', 'number_of_participants']])

    pipeline.replace_table("tours", tours)

    # - run annotations
    annotate_jtp(model_settings, trace_label)

    if trace_hh_id:
        tracing.trace_df(participants,
                         label="joint_tour_participation.participants")

        tracing.trace_df(joint_tours,
                         label="joint_tour_participation.joint_tours")

Example #23

Show file

def atwork_subtour_frequency(tours, persons_merged, chunk_size, trace_hh_id):
    """
    This model predicts the frequency of making at-work subtour tours
    (alternatives for this model come from a separate csv file which is
    configured by the user).
    """

    trace_label = 'atwork_subtour_frequency'
    model_settings_file_name = 'atwork_subtour_frequency.yaml'

    tours = tours.to_frame()
    work_tours = tours[tours.tour_type == 'work']

    # - if no work_tours
    if len(work_tours) == 0:
        add_null_results(trace_label, tours)
        return

    model_settings = config.read_model_settings(model_settings_file_name)
    estimator = estimation.manager.begin_estimation('atwork_subtour_frequency')

    model_spec = simulate.read_model_spec(file_name=model_settings['SPEC'])
    coefficients_df = simulate.read_model_coefficients(model_settings)
    model_spec = simulate.eval_coefficients(model_spec, coefficients_df,
                                            estimator)

    alternatives = simulate.read_model_alts(
        'atwork_subtour_frequency_alternatives.csv', set_index='alt')

    # merge persons into work_tours
    persons_merged = persons_merged.to_frame()
    work_tours = pd.merge(work_tours,
                          persons_merged,
                          left_on='person_id',
                          right_index=True)

    logger.info("Running atwork_subtour_frequency with %d work tours",
                len(work_tours))

    nest_spec = config.get_logit_model_settings(model_settings)
    constants = config.get_model_constants(model_settings)

    # - preprocessor
    preprocessor_settings = model_settings.get('preprocessor', None)
    if preprocessor_settings:

        assign_columns(df=work_tours,
                       model_settings=preprocessor_settings,
                       trace_label=trace_label)

    if estimator:
        estimator.write_spec(model_settings)
        estimator.write_model_settings(model_settings,
                                       model_settings_file_name)
        estimator.write_coefficients(coefficients_df)
        estimator.write_choosers(work_tours)

    choices = simulate.simple_simulate(
        choosers=work_tours,
        spec=model_spec,
        nest_spec=nest_spec,
        locals_d=constants,
        chunk_size=chunk_size,
        trace_label=trace_label,
        trace_choice_name='atwork_subtour_frequency',
        estimator=estimator)

    # convert indexes to alternative names
    choices = pd.Series(model_spec.columns[choices.values],
                        index=choices.index)

    if estimator:
        estimator.write_choices(choices)
        choices = estimator.get_survey_values(choices, 'tours',
                                              'atwork_subtour_frequency')
        estimator.write_override_choices(choices)
        estimator.end_estimation()

    # add atwork_subtour_frequency column to tours
    # reindex since we are working with a subset of tours
    tours['atwork_subtour_frequency'] = choices.reindex(tours.index)
    pipeline.replace_table("tours", tours)

    # - create atwork_subtours based on atwork_subtour_frequency choice names
    work_tours = tours[tours.tour_type == 'work']
    assert not work_tours.atwork_subtour_frequency.isnull().any()

    subtours = process_atwork_subtours(work_tours, alternatives)

    tours = pipeline.extend_table("tours", subtours)

    tracing.register_traceable_table('tours', subtours)
    pipeline.get_rn_generator().add_channel('tours', subtours)

    tracing.print_summary('atwork_subtour_frequency',
                          tours.atwork_subtour_frequency,
                          value_counts=True)

    if trace_hh_id:
        tracing.trace_df(tours, label='atwork_subtour_frequency.tours')

Example #24

Show file

File: mandatory_scheduling.py Project: jiaodaxiaozi/activitysim

def mandatory_tour_scheduling(tours, persons_merged, tdd_alts, chunk_size,
                              trace_hh_id):
    """
    This model predicts the departure time and duration of each activity for mandatory tours
    """
    trace_label = 'mandatory_tour_scheduling'
    model_settings_file_name = 'mandatory_tour_scheduling.yaml'
    estimators = {}

    model_settings = config.read_model_settings(model_settings_file_name)
    logsum_settings = config.read_model_settings(
        model_settings['LOGSUM_SETTINGS'])

    tours = tours.to_frame()
    mandatory_tours = tours[tours.tour_category == 'mandatory']

    # - if no mandatory_tours
    if mandatory_tours.shape[0] == 0:
        tracing.no_results(trace_label)
        return

    persons_merged = persons_merged.to_frame()

    # - filter chooser columns for both logsums and simulate
    logsum_columns = logsum_settings.get('LOGSUM_CHOOSER_COLUMNS', [])
    model_columns = model_settings.get('SIMULATE_CHOOSER_COLUMNS', [])
    chooser_columns = logsum_columns + [
        c for c in model_columns if c not in logsum_columns
    ]
    persons_merged = expressions.filter_chooser_columns(
        persons_merged, chooser_columns)

    # - add tour segmentation column
    # mtctm1 segments mandatory_scheduling spec by tour_type
    # (i.e. there are different specs for work and school tour_types)
    # mtctm1 logsum coefficients are segmented by primary_purpose
    # (i.e. there are different locsum coefficents for work, school, univ primary_purposes
    # for simplicity managing these different segmentation schemes,
    # we conflate them by segmenting the skims to align with primary_purpose
    tour_segment_col = 'mandatory_tour_seg'
    assert tour_segment_col not in mandatory_tours
    is_university_tour = \
        (mandatory_tours.tour_type == 'school') & \
        reindex(persons_merged.is_university, mandatory_tours.person_id)
    mandatory_tours[tour_segment_col] = \
        mandatory_tours.tour_type.where(~is_university_tour, 'univ')

    # load specs
    spec_segment_settings = model_settings.get('SPEC_SEGMENTS', {})
    specs = {}
    estimators = {}
    for spec_segment_name, spec_settings in spec_segment_settings.items():

        # estimator for this tour_segment
        estimator = estimation.manager.begin_estimation(
            model_name='mandatory_tour_scheduling_%s' % spec_segment_name,
            bundle_name='mandatory_tour_scheduling')

        spec_file_name = spec_settings['SPEC']
        model_spec = simulate.read_model_spec(file_name=spec_file_name)
        coefficients_df = simulate.read_model_coefficients(
            spec_segment_settings[spec_segment_name])
        specs[spec_segment_name] = simulate.eval_coefficients(
            model_spec, coefficients_df, estimator)

        if estimator:
            estimators[spec_segment_name] = estimator  # add to local list
            estimator.write_model_settings(model_settings,
                                           model_settings_file_name)
            estimator.write_spec(spec_settings)
            estimator.write_coefficients(coefficients_df)

    # - spec dict segmented by primary_purpose
    tour_segment_settings = model_settings.get('TOUR_SPEC_SEGMENTS', {})
    tour_segments = {}
    for tour_segment_name, spec_segment_name in tour_segment_settings.items():
        tour_segments[tour_segment_name] = {}
        tour_segments[tour_segment_name][
            'spec_segment_name'] = spec_segment_name
        tour_segments[tour_segment_name]['spec'] = specs[spec_segment_name]
        tour_segments[tour_segment_name]['estimator'] = estimators.get(
            spec_segment_name)

    timetable = inject.get_injectable("timetable")

    if estimators:
        timetable.begin_transaction(list(estimators.values()))

    logger.info("Running mandatory_tour_scheduling with %d tours", len(tours))
    choices = vts.vectorize_tour_scheduling(mandatory_tours,
                                            persons_merged,
                                            tdd_alts,
                                            timetable,
                                            tour_segments=tour_segments,
                                            tour_segment_col=tour_segment_col,
                                            model_settings=model_settings,
                                            chunk_size=chunk_size,
                                            trace_label=trace_label)

    if estimators:
        # overrride choices for all estimators
        choices_list = []
        for spec_segment_name, estimator in estimators.items():
            model_choices = choices[(
                mandatory_tours.tour_type == spec_segment_name)]

            # FIXME vectorize_tour_scheduling calls used to write_choices but perhaps shouldn't
            estimator.write_choices(model_choices)
            override_choices = estimator.get_survey_values(
                model_choices, 'tours', 'tdd')
            estimator.write_override_choices(override_choices)

            choices_list.append(override_choices)
            estimator.end_estimation()
        choices = pd.concat(choices_list)

        # update timetable to reflect the override choices (assign tours in tour_num order)
        timetable.rollback()
        for tour_num, nth_tours in tours.groupby('tour_num', sort=True):
            timetable.assign(window_row_ids=nth_tours['person_id'],
                             tdds=choices.reindex(nth_tours.index))

    # choices are tdd alternative ids
    # we want to add start, end, and duration columns to tours, which we have in tdd_alts table
    choices = pd.merge(choices.to_frame('tdd'),
                       tdd_alts,
                       left_on=['tdd'],
                       right_index=True,
                       how='left')

    assign_in_place(tours, choices)
    pipeline.replace_table("tours", tours)

    timetable.replace_table()

    # updated df for tracing
    mandatory_tours = tours[tours.tour_category == 'mandatory']

    tracing.dump_df(DUMP, tt.tour_map(persons_merged, mandatory_tours,
                                      tdd_alts), trace_label, 'tour_map')

    if trace_hh_id:
        tracing.trace_df(mandatory_tours,
                         label="mandatory_tour_scheduling",
                         slicer='person_id',
                         index_label='tour',
                         columns=None,
                         warn_if_empty=True)

Example #25

Show file

File: stop_frequency.py Project: fscottfoti/activitysim

def stop_frequency(tours, tours_merged, stop_frequency_alts, network_los,
                   chunk_size, trace_hh_id):
    """
    stop frequency model

    For each tour, shoose a number of intermediate inbound stops and outbound stops.
    Create a trip table with inbound and outbound trips.

    Thus, a tour with stop_frequency '2out_0in' will have two outbound and zero inbound stops,
    and four corresponding trips: three outbound, and one inbound.

    Adds stop_frequency str column to trips, with fields

    creates trips table with columns:

    ::

        - person_id
        - household_id
        - tour_id
        - primary_purpose
        - atwork
        - trip_num
        - outbound
        - trip_count

    """

    trace_label = 'stop_frequency'
    model_settings_file_name = 'stop_frequency.yaml'

    model_settings = config.read_model_settings(model_settings_file_name)

    tours = tours.to_frame()
    tours_merged = tours_merged.to_frame()
    assert not tours_merged.household_id.isnull().any()
    assert not (tours_merged.origin == -1).any()
    assert not (tours_merged.destination == -1).any()

    nest_spec = config.get_logit_model_settings(model_settings)
    constants = config.get_model_constants(model_settings)

    # - run preprocessor to annotate tours_merged
    preprocessor_settings = model_settings.get('preprocessor', None)
    if preprocessor_settings:

        # hack: preprocessor adds origin column in place if it does not exist already
        assert 'origin' in tours_merged
        assert 'destination' in tours_merged
        od_skim_stack_wrapper = network_los.get_default_skim_dict().wrap(
            'origin', 'destination')
        skims = [od_skim_stack_wrapper]

        locals_dict = {
            "od_skims": od_skim_stack_wrapper,
            'network_los': network_los
        }
        locals_dict.update(constants)

        simulate.set_skim_wrapper_targets(tours_merged, skims)

        # this should be pre-slice as some expressions may count tours by type
        annotations = expressions.compute_columns(
            df=tours_merged,
            model_settings=preprocessor_settings,
            locals_dict=locals_dict,
            trace_label=trace_label)

        assign_in_place(tours_merged, annotations)

    tracing.print_summary('stop_frequency segments',
                          tours_merged.primary_purpose,
                          value_counts=True)

    spec_segments = model_settings.get('SPEC_SEGMENTS')
    assert spec_segments is not None, f"SPEC_SEGMENTS setting not found in model settings: {model_settings_file_name}"
    segment_col = model_settings.get('SEGMENT_COL')
    assert segment_col is not None, f"SEGMENT_COL setting not found in model settings: {model_settings_file_name}"

    nest_spec = config.get_logit_model_settings(model_settings)

    choices_list = []
    for segment_settings in spec_segments:

        segment_name = segment_settings[segment_col]
        segment_value = segment_settings[segment_col]

        chooser_segment = tours_merged[tours_merged[segment_col] ==
                                       segment_value]

        if len(chooser_segment) == 0:
            logging.info(
                f"{trace_label} skipping empty segment {segment_name}")
            continue

        logging.info(
            f"{trace_label} running segment {segment_name} with {chooser_segment.shape[0]} chooser rows"
        )

        estimator = estimation.manager.begin_estimation(
            model_name=segment_name, bundle_name='stop_frequency')

        segment_spec = simulate.read_model_spec(
            file_name=segment_settings['SPEC'])
        assert segment_spec is not None, "spec for segment_type %s not found" % segment_name

        coefficients_file_name = segment_settings['COEFFICIENTS']
        coefficients_df = simulate.read_model_coefficients(
            file_name=coefficients_file_name)
        segment_spec = simulate.eval_coefficients(segment_spec,
                                                  coefficients_df, estimator)

        if estimator:
            estimator.write_spec(segment_settings, bundle_directory=False)
            estimator.write_model_settings(model_settings,
                                           model_settings_file_name,
                                           bundle_directory=True)
            estimator.write_coefficients(coefficients_df, segment_settings)
            estimator.write_choosers(chooser_segment)

            estimator.set_chooser_id(chooser_segment.index.name)

        choices = simulate.simple_simulate(
            choosers=chooser_segment,
            spec=segment_spec,
            nest_spec=nest_spec,
            locals_d=constants,
            chunk_size=chunk_size,
            trace_label=tracing.extend_trace_label(trace_label, segment_name),
            trace_choice_name='stops',
            estimator=estimator)

        # convert indexes to alternative names
        choices = pd.Series(segment_spec.columns[choices.values],
                            index=choices.index)

        if estimator:
            estimator.write_choices(choices)
            choices = estimator.get_survey_values(
                choices, 'tours', 'stop_frequency')  # override choices
            estimator.write_override_choices(choices)
            estimator.end_estimation()

        choices_list.append(choices)

    choices = pd.concat(choices_list)

    tracing.print_summary('stop_frequency', choices, value_counts=True)

    # add stop_frequency choices to tours table
    assign_in_place(tours, choices.to_frame('stop_frequency'))

    # FIXME should have added this when tours created?
    assert 'primary_purpose' not in tours
    if 'primary_purpose' not in tours.columns:
        # if not already there, then it will have been added by annotate tours preprocessor
        assign_in_place(tours, tours_merged[['primary_purpose']])

    pipeline.replace_table("tours", tours)

    # create trips table
    trips = process_trips(tours, stop_frequency_alts)
    trips = pipeline.extend_table("trips", trips)
    tracing.register_traceable_table('trips', trips)
    pipeline.get_rn_generator().add_channel('trips', trips)

    if estimator:
        # make sure they created trips with the expected tour_ids
        columns = ['person_id', 'household_id', 'tour_id', 'outbound']

        survey_trips = estimation.manager.get_survey_table(table_name='trips')
        different = False
        survey_trips_not_in_trips = survey_trips[~survey_trips.index.
                                                 isin(trips.index)]
        if len(survey_trips_not_in_trips) > 0:
            print(f"survey_trips_not_in_trips\n{survey_trips_not_in_trips}")
            different = True
        trips_not_in_survey_trips = trips[~trips.index.isin(survey_trips.index
                                                            )]
        if len(survey_trips_not_in_trips) > 0:
            print(f"trips_not_in_survey_trips\n{trips_not_in_survey_trips}")
            different = True
        assert not different

        survey_trips = \
            estimation.manager.get_survey_values(trips,
                                                 table_name='trips',
                                                 column_names=columns)

        trips_differ = (trips[columns] != survey_trips[columns]).any(axis=1)

        if trips_differ.any():
            print("trips_differ\n%s" % trips_differ)
            print("%s of %s tours differ" %
                  (trips_differ.sum(), len(trips_differ)))
            print("differing survey_trips\n%s" % survey_trips[trips_differ])
            print("differing modeled_trips\n%s" % trips[columns][trips_differ])

        assert (not trips_differ.any())

    if trace_hh_id:
        tracing.trace_df(tours,
                         label="stop_frequency.tours",
                         slicer='person_id',
                         columns=None)

        tracing.trace_df(trips,
                         label="stop_frequency.trips",
                         slicer='person_id',
                         columns=None)

        tracing.trace_df(annotations,
                         label="stop_frequency.annotations",
                         columns=None)

        tracing.trace_df(tours_merged,
                         label="stop_frequency.tours_merged",
                         slicer='person_id',
                         columns=None)

Example #26

Show file

def run_tour_scheduling(model_name, chooser_tours, persons_merged, tdd_alts,
                        tour_segment_col, chunk_size, trace_hh_id):

    trace_label = model_name
    model_settings_file_name = f'{model_name}.yaml'

    model_settings = config.read_model_settings(model_settings_file_name)

    if 'LOGSUM_SETTINGS' in model_settings:
        logsum_settings = config.read_model_settings(
            model_settings['LOGSUM_SETTINGS'])
        logsum_columns = logsum_settings.get('LOGSUM_CHOOSER_COLUMNS', [])
    else:
        logsum_columns = []

    # - filter chooser columns for both logsums and simulate
    model_columns = model_settings.get('SIMULATE_CHOOSER_COLUMNS', [])
    chooser_columns = logsum_columns + [
        c for c in model_columns if c not in logsum_columns
    ]

    persons_merged = expressions.filter_chooser_columns(
        persons_merged, chooser_columns)

    timetable = inject.get_injectable("timetable")

    # - run preprocessor to annotate choosers
    preprocessor_settings = model_settings.get('preprocessor', None)
    if preprocessor_settings:
        locals_d = {'tt': timetable}
        locals_d.update(config.get_model_constants(model_settings))

        expressions.assign_columns(df=chooser_tours,
                                   model_settings=preprocessor_settings,
                                   locals_dict=locals_d,
                                   trace_label=trace_label)

    estimators = {}
    if 'TOUR_SPEC_SEGMENTS' in model_settings:
        # load segmented specs
        spec_segment_settings = model_settings.get('SPEC_SEGMENTS', {})
        specs = {}
        for spec_segment_name, spec_settings in spec_segment_settings.items():

            bundle_name = f'{model_name}_{spec_segment_name}'

            # estimator for this tour_segment
            estimator = estimation.manager.begin_estimation(
                model_name=bundle_name, bundle_name=bundle_name)

            spec_file_name = spec_settings['SPEC']
            model_spec = simulate.read_model_spec(file_name=spec_file_name)
            coefficients_df = simulate.read_model_coefficients(spec_settings)
            specs[spec_segment_name] = simulate.eval_coefficients(
                model_spec, coefficients_df, estimator)

            if estimator:
                estimators[spec_segment_name] = estimator  # add to local list
                estimator.write_model_settings(model_settings,
                                               model_settings_file_name)
                estimator.write_spec(spec_settings)
                estimator.write_coefficients(coefficients_df, spec_settings)

        # - spec dict segmented by primary_purpose
        tour_segment_settings = model_settings.get('TOUR_SPEC_SEGMENTS', {})
        tour_segments = {}
        for tour_segment_name, spec_segment_name in tour_segment_settings.items(
        ):
            tour_segments[tour_segment_name] = {}
            tour_segments[tour_segment_name][
                'spec_segment_name'] = spec_segment_name
            tour_segments[tour_segment_name]['spec'] = specs[spec_segment_name]
            tour_segments[tour_segment_name]['estimator'] = estimators.get(
                spec_segment_name)

        # default tour_segment_col to 'tour_type' if segmented spec and tour_segment_col not specified
        if tour_segment_col is None and tour_segments:
            tour_segment_col = 'tour_type'

    else:
        # unsegmented spec
        assert 'SPEC_SEGMENTS' not in model_settings
        assert 'TOUR_SPEC_SEGMENTS' not in model_settings
        assert tour_segment_col is None

        estimator = estimation.manager.begin_estimation(model_name)

        spec_file_name = model_settings['SPEC']
        model_spec = simulate.read_model_spec(file_name=spec_file_name)
        coefficients_df = simulate.read_model_coefficients(model_settings)
        model_spec = simulate.eval_coefficients(model_spec, coefficients_df,
                                                estimator)

        if estimator:
            estimators[None] = estimator  # add to local list
            estimator.write_model_settings(model_settings,
                                           model_settings_file_name)
            estimator.write_spec(model_settings)
            estimator.write_coefficients(coefficients_df, model_settings)

        # - non_mandatory tour scheduling is not segmented by tour type
        tour_segments = {'spec': model_spec, 'estimator': estimator}

    if estimators:
        timetable.begin_transaction(list(estimators.values()))

    logger.info(f"Running {model_name} with %d tours", len(chooser_tours))
    choices = vts.vectorize_tour_scheduling(chooser_tours,
                                            persons_merged,
                                            tdd_alts,
                                            timetable,
                                            tour_segments=tour_segments,
                                            tour_segment_col=tour_segment_col,
                                            model_settings=model_settings,
                                            chunk_size=chunk_size,
                                            trace_label=trace_label)

    if estimators:
        # overrride choices for all estimators
        choices_list = []
        for spec_segment_name, estimator in estimators.items():
            if spec_segment_name:
                model_choices = choices[(
                    chooser_tours.tour_type == spec_segment_name)]
            else:
                model_choices = choices

            estimator.write_choices(model_choices)
            override_choices = estimator.get_survey_values(
                model_choices, 'tours', 'tdd')
            estimator.write_override_choices(override_choices)

            choices_list.append(override_choices)
            estimator.end_estimation()
        choices = pd.concat(choices_list)

        # update timetable to reflect the override choices (assign tours in tour_num order)
        timetable.rollback()
        for tour_num, nth_tours in chooser_tours.groupby('tour_num',
                                                         sort=True):
            timetable.assign(window_row_ids=nth_tours['person_id'],
                             tdds=choices.reindex(nth_tours.index))

    timetable.replace_table()

    # choices are tdd alternative ids
    # we want to add start, end, and duration columns to tours, which we have in tdd_alts table
    choices = pd.merge(choices.to_frame('tdd'),
                       tdd_alts,
                       left_on=['tdd'],
                       right_index=True,
                       how='left')

    return choices

Example #27

Show file

File: logsums.py Project: ymaVTA/activitysim

def compute_logsums(choosers,
                    tour_purpose,
                    logsum_settings, model_settings,
                    skim_dict, skim_stack,
                    chunk_size, trace_label):
    """

    Parameters
    ----------
    choosers
    tour_purpose
    logsum_settings
    model_settings
    skim_dict
    skim_stack
    chunk_size
    trace_hh_id
    trace_label

    Returns
    -------
    logsums: pandas series
        computed logsums with same index as choosers
    """

    trace_label = tracing.extend_trace_label(trace_label, 'compute_logsums')

    # compute_logsums needs to know name of dest column in interaction_sample
    orig_col_name = model_settings['CHOOSER_ORIG_COL_NAME']
    dest_col_name = model_settings['ALT_DEST_COL_NAME']

    # FIXME - are we ok with altering choosers (so caller doesn't have to set these)?
    assert ('in_period' not in choosers) and ('out_period' not in choosers)
    choosers['in_period'] = expressions.skim_time_period_label(model_settings['IN_PERIOD'])
    choosers['out_period'] = expressions.skim_time_period_label(model_settings['OUT_PERIOD'])

    assert ('duration' not in choosers)
    choosers['duration'] = model_settings['IN_PERIOD'] - model_settings['OUT_PERIOD']

    logsum_spec = simulate.read_model_spec(file_name=logsum_settings['SPEC'])
    coefficients = simulate.get_segment_coefficients(logsum_settings, tour_purpose)
    logsum_spec = simulate.eval_coefficients(logsum_spec, coefficients, estimator=None)

    nest_spec = config.get_logit_model_settings(logsum_settings)
    nest_spec = simulate.eval_nest_coefficients(nest_spec, coefficients)

    constants = config.get_model_constants(logsum_settings)

    logger.debug("Running compute_logsums with %d choosers" % choosers.shape[0])

    # setup skim keys
    odt_skim_stack_wrapper = skim_stack.wrap(left_key=orig_col_name, right_key=dest_col_name,
                                             skim_key='out_period')
    dot_skim_stack_wrapper = skim_stack.wrap(left_key=dest_col_name, right_key=orig_col_name,
                                             skim_key='in_period')
    odr_skim_stack_wrapper = skim_stack.wrap(left_key=orig_col_name, right_key=dest_col_name,
                                             skim_key='in_period')
    dor_skim_stack_wrapper = skim_stack.wrap(left_key=dest_col_name, right_key=orig_col_name,
                                             skim_key='out_period')
    od_skim_stack_wrapper = skim_dict.wrap(orig_col_name, dest_col_name)

    skims = {
        "odt_skims": odt_skim_stack_wrapper,
        "dot_skims": dot_skim_stack_wrapper,
        "odr_skims": odr_skim_stack_wrapper,
        "dor_skims": dor_skim_stack_wrapper,
        "od_skims": od_skim_stack_wrapper,
        'orig_col_name': orig_col_name,
        'dest_col_name': dest_col_name
    }

    locals_dict = {}
    locals_dict.update(constants)
    locals_dict.update(skims)

    # constrained coefficients can appear in expressions
    locals_dict.update(coefficients)

    # - run preprocessor to annotate choosers
    # allow specification of alternate preprocessor for nontour choosers
    preprocessor = model_settings.get('LOGSUM_PREPROCESSOR', 'preprocessor')
    preprocessor_settings = logsum_settings[preprocessor]

    if preprocessor_settings:

        simulate.set_skim_wrapper_targets(choosers, skims)

        expressions.assign_columns(
            df=choosers,
            model_settings=preprocessor_settings,
            locals_dict=locals_dict,
            trace_label=trace_label)

    logsums = simulate.simple_simulate_logsums(
        choosers,
        logsum_spec,
        nest_spec,
        skims=skims,
        locals_d=locals_dict,
        chunk_size=chunk_size,
        trace_label=trace_label,
        alt_col_name=dest_col_name)

    return logsums