Exemple #1
0
def annotate_tables(model_settings, trace_label):

    annotate_tables = model_settings.get('annotate_tables', [])

    if not annotate_tables:
        logger.warning("annotate_tables setting is empty - nothing to do!")

    t0 = tracing.print_elapsed_time()

    for table_info in annotate_tables:

        tablename = table_info['tablename']
        df = inject.get_table(tablename).to_frame()

        # - rename columns
        column_map = table_info.get('column_map', None)
        if column_map:
            logger.info("renaming %s columns %s" % (tablename, column_map,))
            df.rename(columns=column_map, inplace=True)

        # - annotate
        annotate = table_info.get('annotate', None)
        if annotate:
            logger.info("annotated %s SPEC %s" % (tablename, annotate['SPEC'],))
            expressions.assign_columns(
                df=df,
                model_settings=annotate,
                trace_label=trace_label)

        # fixme - narrow?

        # - write table to pipeline
        pipeline.replace_table(tablename, df)
Exemple #2
0
def trip_purpose(
        trips,
        chunk_size,
        trace_hh_id):

    """
    trip purpose model step - calls run_trip_purpose to run the actual model

    adds purpose column to trips
    """
    trace_label = "trip_purpose"

    trips_df = trips.to_frame()

    choices = run_trip_purpose(
        trips_df,
        chunk_size=chunk_size,
        trace_hh_id=trace_hh_id,
        trace_label=trace_label
    )

    trips_df['purpose'] = choices

    # we should have assigned a purpose to all trips
    assert not trips_df.purpose.isnull().any()

    pipeline.replace_table("trips", trips_df)

    if trace_hh_id:
        tracing.trace_df(trips_df,
                         label=trace_label,
                         slicer='trip_id',
                         index_label='trip_id',
                         warn_if_empty=True)
Exemple #3
0
def free_parking(
        persons_merged, persons, households,
        skim_dict, skim_stack,
        chunk_size, trace_hh_id, locutor):
    """

    """

    trace_label = 'free_parking'
    model_settings = config.read_model_settings('free_parking.yaml')

    choosers = persons_merged.to_frame()
    choosers = choosers[choosers.workplace_taz > -1]

    logger.info("Running %s with %d persons", trace_label, len(choosers))

    constants = config.get_model_constants(model_settings)

    # - preprocessor
    preprocessor_settings = model_settings.get('preprocessor', None)
    if preprocessor_settings:

        locals_d = {}
        if constants is not None:
            locals_d.update(constants)

        expressions.assign_columns(
            df=choosers,
            model_settings=preprocessor_settings,
            locals_dict=locals_d,
            trace_label=trace_label)

    model_spec = simulate.read_model_spec(file_name='free_parking.csv')
    nest_spec = config.get_logit_model_settings(model_settings)

    choices = simulate.simple_simulate(
        choosers=choosers,
        spec=model_spec,
        nest_spec=nest_spec,
        locals_d=constants,
        chunk_size=chunk_size,
        trace_label=trace_label,
        trace_choice_name='free_parking_at_work')

    persons = persons.to_frame()

    # no need to reindex as we used all households
    free_parking_alt = model_settings['FREE_PARKING_ALT']
    choices = (choices == free_parking_alt)
    persons['free_parking_at_work'] = choices.reindex(persons.index).fillna(0).astype(bool)

    pipeline.replace_table("persons", persons)

    tracing.print_summary('free_parking', persons.free_parking_at_work, value_counts=True)

    if trace_hh_id:
        tracing.trace_df(persons,
                         label=trace_label,
                         warn_if_empty=True)
def atwork_subtour_destination(
        tours,
        persons_merged,
        skim_dict,
        skim_stack,
        land_use, size_terms,
        chunk_size, trace_hh_id):

    persons_merged = persons_merged.to_frame()

    tours = tours.to_frame()
    subtours = tours[tours.tour_category == 'atwork']

    # - if no atwork subtours
    if subtours.shape[0] == 0:
        tracing.no_results('atwork_subtour_destination')
        return

    # interaction_sample_simulate insists choosers appear in same order as alts
    subtours = subtours.sort_index()

    destination_size_terms = tour_destination_size_terms(land_use, size_terms, 'atwork')

    destination_sample = atwork_subtour_destination_sample(
        subtours,
        persons_merged,
        skim_dict,
        destination_size_terms,
        chunk_size, trace_hh_id)

    destination_sample = atwork_subtour_destination_logsums(
        persons_merged,
        destination_sample,
        skim_dict, skim_stack,
        chunk_size, trace_hh_id)

    choices = atwork_subtour_destination_simulate(
        subtours,
        persons_merged,
        destination_sample,
        skim_dict,
        destination_size_terms,
        chunk_size, trace_hh_id)

    subtours['destination'] = choices

    assign_in_place(tours, subtours[['destination']])

    pipeline.replace_table("tours", tours)

    tracing.print_summary('subtour destination', subtours.destination, describe=True)

    if trace_hh_id:
        tracing.trace_df(tours,
                         label='atwork_subtour_destination',
                         columns=['destination'])
def non_mandatory_tour_destination(
        tours,
        persons_merged,
        skim_dict, skim_stack,
        chunk_size,
        trace_hh_id):

    """
    Given the tour generation from the above, each tour needs to have a
    destination, so in this case tours are the choosers (with the associated
    person that's making the tour)
    """

    trace_label = 'non_mandatory_tour_destination'
    model_settings = config.read_model_settings('non_mandatory_tour_destination.yaml')

    tours = tours.to_frame()

    persons_merged = persons_merged.to_frame()

    # choosers are tours - in a sense tours are choosing their destination
    non_mandatory_tours = tours[tours.tour_category == 'non_mandatory']

    # - if no mandatory_tours
    if non_mandatory_tours.shape[0] == 0:
        tracing.no_results(trace_label)
        return

    choices = tour_destination.run_tour_destination(
        tours,
        persons_merged,
        model_settings,
        skim_dict,
        skim_stack,
        chunk_size, trace_hh_id, trace_label)

    non_mandatory_tours['destination'] = choices

    assign_in_place(tours, non_mandatory_tours[['destination']])

    pipeline.replace_table("tours", tours)

    if trace_hh_id:
        tracing.trace_df(tours[tours.tour_category == 'non_mandatory'],
                         label="non_mandatory_tour_destination",
                         slicer='person_id',
                         index_label='tour',
                         columns=None,
                         warn_if_empty=True)
Exemple #6
0
def trip_destination(
        trips,
        tours_merged,
        chunk_size, trace_hh_id):
    """
    Choose a destination for all 'intermediate' trips based on trip purpose.

    Final trips already have a destination (the primary tour destination for outbound trips,
    and home for inbound trips.)


    """
    trace_label = 'trip_destination'
    model_settings = config.read_model_settings('trip_destination.yaml')
    CLEANUP = model_settings.get('CLEANUP', True)

    trips_df = trips.to_frame()
    tours_merged_df = tours_merged.to_frame()

    logger.info("Running %s with %d trips", trace_label, trips_df.shape[0])

    trips_df = run_trip_destination(
        trips_df,
        tours_merged_df,
        chunk_size, trace_hh_id,
        trace_label)

    if trips_df.failed.any():
        logger.warning("%s %s failed trips", trace_label, trips_df.failed.sum())
        file_name = "%s_failed_trips" % trace_label
        logger.info("writing failed trips to %s", file_name)
        tracing.write_csv(trips_df[trips_df.failed], file_name=file_name, transpose=False)

    if CLEANUP:
        trips_df = cleanup_failed_trips(trips_df)
    elif trips_df.failed.any():
        logger.warning("%s keeping %s sidelined failed trips" %
                       (trace_label, trips_df.failed.sum()))

    pipeline.replace_table("trips", trips_df)

    print("trips_df\n", trips_df.shape)

    if trace_hh_id:
        tracing.trace_df(trips_df,
                         label=trace_label,
                         slicer='trip_id',
                         index_label='trip_id',
                         warn_if_empty=True)
Exemple #7
0
    def replace_table(self):
        """
        Save or replace windows_df  DataFrame to pipeline with saved table name
        (specified when object instantiated.)

        This is a convenience function in case caller instantiates object in one context
        (e.g. dependency injection) where it knows the pipeline table name, but wants to
        checkpoint the table in another context where it does not know that name.
        """

        assert self.windows_table_name is not None

        # get windows_df from bottleneck function in case updates to self.person_window
        # do not write through to pandas dataframe
        pipeline.replace_table(self.windows_table_name, self.get_windows_df())
Exemple #8
0
def initialize_landuse():

    trace_label = 'initialize_landuse'

    model_settings = config.read_model_settings('initialize_landuse.yaml', mandatory=True)

    annotate_tables(model_settings, trace_label)

    # create accessibility
    land_use = pipeline.get_table('land_use')

    accessibility_df = pd.DataFrame(index=land_use.index)

    # - write table to pipeline
    pipeline.replace_table("accessibility", accessibility_df)
def add_null_results(trace_label, mandatory_tour_frequency_settings):
    logger.info("Skipping %s: add_null_results", trace_label)

    persons = inject.get_table('persons').to_frame()
    persons['mandatory_tour_frequency'] = ''

    tours = pd.DataFrame()
    tours['tour_category'] = None
    tours['tour_type'] = None
    tours['person_id'] = None
    tours.index.name = 'tour_id'
    pipeline.replace_table("tours", tours)

    expressions.assign_columns(
        df=persons,
        model_settings=mandatory_tour_frequency_settings.get('annotate_persons'),
        trace_label=tracing.extend_trace_label(trace_label, 'annotate_persons'))

    pipeline.replace_table("persons", persons)
Exemple #10
0
def auto_ownership_simulate(households,
                            households_merged,
                            chunk_size,
                            trace_hh_id):
    """
    Auto ownership is a standard model which predicts how many cars a household
    with given characteristics owns
    """
    trace_label = 'auto_ownership_simulate'
    model_settings = config.read_model_settings('auto_ownership.yaml')

    logger.info("Running %s with %d households", trace_label, len(households_merged))

    model_spec = simulate.read_model_spec(file_name='auto_ownership.csv')

    nest_spec = config.get_logit_model_settings(model_settings)
    constants = config.get_model_constants(model_settings)

    choices = simulate.simple_simulate(
        choosers=households_merged.to_frame(),
        spec=model_spec,
        nest_spec=nest_spec,
        locals_d=constants,
        chunk_size=chunk_size,
        trace_label=trace_label,
        trace_choice_name='auto_ownership')

    households = households.to_frame()

    # no need to reindex as we used all households
    households['auto_ownership'] = choices

    pipeline.replace_table("households", households)

    tracing.print_summary('auto_ownership', households.auto_ownership, value_counts=True)

    if trace_hh_id:
        tracing.trace_df(households,
                         label='auto_ownership',
                         warn_if_empty=True)
def atwork_subtour_scheduling(
        tours,
        persons_merged,
        tdd_alts,
        skim_dict,
        chunk_size,
        trace_hh_id):
    """
    This model predicts the departure time and duration of each activity for at work subtours tours
    """

    trace_label = 'atwork_subtour_scheduling'
    model_settings = config.read_model_settings('tour_scheduling_atwork.yaml')
    model_spec = simulate.read_model_spec(file_name='tour_scheduling_atwork.csv')

    persons_merged = persons_merged.to_frame()

    tours = tours.to_frame()
    subtours = tours[tours.tour_category == 'atwork']

    # - if no atwork subtours
    if subtours.shape[0] == 0:
        tracing.no_results(trace_label)
        return

    logger.info("Running %s with %d tours", trace_label, len(subtours))

    # preprocessor
    constants = config.get_model_constants(model_settings)
    od_skim_wrapper = skim_dict.wrap('origin', 'destination')
    do_skim_wrapper = skim_dict.wrap('destination', 'origin')
    skims = {
        "od_skims": od_skim_wrapper,
        "do_skims": do_skim_wrapper,
    }
    annotate_preprocessors(
        subtours, constants, skims,
        model_settings, trace_label)

    # parent_tours table with columns ['tour_id', 'tdd'] index = tour_id
    parent_tour_ids = subtours.parent_tour_id.astype(int).unique()
    parent_tours = pd.DataFrame({'tour_id': parent_tour_ids}, index=parent_tour_ids)
    parent_tours = parent_tours.merge(tours[['tdd']], left_index=True, right_index=True)

    tdd_choices = vectorize_subtour_scheduling(
        parent_tours,
        subtours,
        persons_merged,
        tdd_alts, model_spec,
        model_settings,
        chunk_size=chunk_size,
        trace_label=trace_label)

    assign_in_place(tours, tdd_choices)
    pipeline.replace_table("tours", tours)

    if trace_hh_id:
        tracing.trace_df(tours[tours.tour_category == 'atwork'],
                         label="atwork_subtour_scheduling",
                         slicer='person_id',
                         index_label='tour_id',
                         columns=None)

    if DUMP:
        subtours = tours[tours.tour_category == 'atwork']
        parent_tours = tours[tours.index.isin(subtours.parent_tour_id)]

        tracing.dump_df(DUMP, subtours, trace_label, 'sub_tours')
        tracing.dump_df(DUMP, parent_tours, trace_label, 'parent_tours')

        parent_tours['parent_tour_id'] = parent_tours.index
        subtours = pd.concat([parent_tours, subtours])
        tracing.dump_df(DUMP,
                        tt.tour_map(parent_tours, subtours, tdd_alts,
                                    persons_id_col='parent_tour_id'),
                        trace_label, 'tour_map')
Exemple #12
0
def joint_tour_frequency(households, persons, chunk_size, trace_hh_id):
    """
    This model predicts the frequency of making fully joint trips (see the
    alternatives above).
    """
    trace_label = 'joint_tour_frequency'
    model_settings_file_name = 'joint_tour_frequency.yaml'

    model_settings = config.read_model_settings(model_settings_file_name)

    alternatives = simulate.read_model_alts(
        'joint_tour_frequency_alternatives.csv', set_index='alt')

    # - only interested in households with more than one cdap travel_active person and
    # - at least one non-preschooler
    households = households.to_frame()
    multi_person_households = households[
        households.participates_in_jtf_model].copy()

    # - only interested in persons in multi_person_households
    # FIXME - gratuitous pathological efficiency move, just let yaml specify persons?
    persons = persons.to_frame()
    persons = persons[persons.household_id.isin(multi_person_households.index)]

    logger.info(
        "Running joint_tour_frequency with %d multi-person households" %
        multi_person_households.shape[0])

    # - preprocessor
    preprocessor_settings = model_settings.get('preprocessor', None)
    if preprocessor_settings:

        locals_dict = {
            'persons': persons,
            'hh_time_window_overlap': hh_time_window_overlap
        }

        expressions.assign_columns(df=multi_person_households,
                                   model_settings=preprocessor_settings,
                                   locals_dict=locals_dict,
                                   trace_label=trace_label)

    estimator = estimation.manager.begin_estimation('joint_tour_frequency')

    model_spec = simulate.read_model_spec(file_name=model_settings['SPEC'])
    coefficients_df = simulate.read_model_coefficients(model_settings)
    model_spec = simulate.eval_coefficients(model_spec, coefficients_df,
                                            estimator)

    nest_spec = config.get_logit_model_settings(model_settings)
    constants = config.get_model_constants(model_settings)

    if estimator:
        estimator.write_spec(model_settings)
        estimator.write_model_settings(model_settings,
                                       model_settings_file_name)
        estimator.write_coefficients(coefficients_df)
        estimator.write_choosers(multi_person_households)

    choices = simulate.simple_simulate(
        choosers=multi_person_households,
        spec=model_spec,
        nest_spec=nest_spec,
        locals_d=constants,
        chunk_size=chunk_size,
        trace_label=trace_label,
        trace_choice_name='joint_tour_frequency',
        estimator=estimator)

    # convert indexes to alternative names
    choices = pd.Series(model_spec.columns[choices.values],
                        index=choices.index)

    if estimator:
        estimator.write_choices(choices)
        choices = estimator.get_survey_values(choices, 'households',
                                              'joint_tour_frequency')
        estimator.write_override_choices(choices)
        estimator.end_estimation()

    # - create joint_tours based on joint_tour_frequency choices

    # - we need a person_id in order to generate the tour index (and for register_traceable_table)
    # - but we don't know the tour participants yet
    # - so we arbitrarily choose the first person in the household
    # - to be point person for the purpose of generating an index and setting origin
    temp_point_persons = persons.loc[persons.PNUM == 1]
    temp_point_persons['person_id'] = temp_point_persons.index
    temp_point_persons = temp_point_persons.set_index('household_id')
    temp_point_persons = temp_point_persons[['person_id', 'home_taz']]

    joint_tours = \
        process_joint_tours(choices, alternatives, temp_point_persons)

    tours = pipeline.extend_table("tours", joint_tours)

    tracing.register_traceable_table('tours', joint_tours)
    pipeline.get_rn_generator().add_channel('tours', joint_tours)

    # - annotate households

    # we expect there to be an alt with no tours - which we can use to backfill non-travelers
    no_tours_alt = (alternatives.sum(axis=1) == 0).index[0]
    households['joint_tour_frequency'] = choices.reindex(
        households.index).fillna(no_tours_alt).astype(str)

    households['num_hh_joint_tours'] = joint_tours.groupby('household_id').size().\
        reindex(households.index).fillna(0).astype(np.int8)

    pipeline.replace_table("households", households)

    tracing.print_summary('joint_tour_frequency',
                          households.joint_tour_frequency,
                          value_counts=True)

    if trace_hh_id:
        tracing.trace_df(households, label="joint_tour_frequency.households")

        tracing.trace_df(joint_tours,
                         label="joint_tour_frequency.joint_tours",
                         slicer='household_id')
Exemple #13
0
def compute_accessibility(accessibility, skim_dict, land_use, trace_od):

    """
    Compute accessibility for each zone in land use file using expressions from accessibility_spec

    The actual results depend on the expressions in accessibility_spec, but this is initially
    intended to permit implementation of the mtc accessibility calculation as implemented by
    Accessibility.job

    Compute measures of accessibility used by the automobile ownership model.
    The accessibility measure first multiplies an employment variable by a mode-specific decay
    function.  The product reflects the difficulty of accessing the activities the farther
    (in terms of round-trip travel time) the jobs are from the location in question. The products
    to each destination zone are next summed over each origin zone, and the logarithm of the
    product mutes large differences.  The decay function on the walk accessibility measure is
    steeper than automobile or transit.  The minimum accessibility is zero.
    """

    trace_label = 'compute_accessibility'
    model_settings = config.read_model_settings('accessibility.yaml')
    assignment_spec = assign.read_assignment_spec(config.config_file_path('accessibility.csv'))

    accessibility_df = accessibility.to_frame()

    logger.info("Running %s with %d dest zones" % (trace_label, len(accessibility_df)))

    constants = config.get_model_constants(model_settings)
    land_use_columns = model_settings.get('land_use_columns', [])

    land_use_df = land_use.to_frame()

    # #bug
    #
    # land_use_df = land_use_df[land_use_df.index % 2 == 1]
    # accessibility_df = accessibility_df[accessibility_df.index.isin(land_use_df.index)].head(5)
    #
    # print "land_use_df", land_use_df.index
    # print "accessibility_df", accessibility_df.index
    # #bug

    orig_zones = accessibility_df.index.values
    dest_zones = land_use_df.index.values

    orig_zone_count = len(orig_zones)
    dest_zone_count = len(dest_zones)

    logger.info("Running %s with %d dest zones %d orig zones" %
                (trace_label, dest_zone_count, orig_zone_count))

    # create OD dataframe
    od_df = pd.DataFrame(
        data={
            'orig': np.repeat(np.asanyarray(accessibility_df.index), dest_zone_count),
            'dest': np.tile(np.asanyarray(land_use_df.index), orig_zone_count)
        }
    )

    if trace_od:
        trace_orig, trace_dest = trace_od
        trace_od_rows = (od_df.orig == trace_orig) & (od_df.dest == trace_dest)
    else:
        trace_od_rows = None

    # merge land_use_columns into od_df
    land_use_df = land_use_df[land_use_columns]
    od_df = pd.merge(od_df, land_use_df, left_on='dest', right_index=True).sort_index()

    locals_d = {
        'log': np.log,
        'exp': np.exp,
        'skim_od': AccessibilitySkims(skim_dict, orig_zones, dest_zones),
        'skim_do': AccessibilitySkims(skim_dict, orig_zones, dest_zones, transpose=True)
    }
    if constants is not None:
        locals_d.update(constants)

    results, trace_results, trace_assigned_locals \
        = assign.assign_variables(assignment_spec, od_df, locals_d, trace_rows=trace_od_rows)

    for column in results.columns:
        data = np.asanyarray(results[column])
        data.shape = (orig_zone_count, dest_zone_count)
        accessibility_df[column] = np.log(np.sum(data, axis=1) + 1)

    # - write table to pipeline
    pipeline.replace_table("accessibility", accessibility_df)

    if trace_od:

        if not trace_od_rows.any():
            logger.warning("trace_od not found origin = %s, dest = %s" % (trace_orig, trace_dest))
        else:

            # add OD columns to trace results
            df = pd.concat([od_df[trace_od_rows], trace_results], axis=1)

            # dump the trace results table (with _temp variables) to aid debugging
            tracing.trace_df(df,
                             label='accessibility',
                             index_label='skim_offset',
                             slicer='NONE',
                             warn_if_empty=True)

            if trace_assigned_locals:
                tracing.write_csv(trace_assigned_locals, file_name="accessibility_locals")
Exemple #14
0
def balance_trips(zones, trace_od):
    """Improve the match between destination zone trip totals
    (given by the DEST_TARGETS in the balance_trips config file)
    and the trip counts calculated during the destination choice step.

    The config file should contain the following parameters:

    dest_zone_trip_targets:
      total: <aggregate destination zone trip counts>

      OR

      <segment_1>: totals for segment 1 (optional)
      <segment_2>: totals for segment 2 (optional)
      <segment_3>: totals for segment 3 (optional)

    (These are optional)
    max_iterations: maximum number of iteration to pass to the balancer
    balance_closure: float precision to stop balancing totals
    input_table: path to CSV to use instead of trips table.

    The config file can also have an orig_zone_trip_targets to manually
    specify origin zone totals instead of using the logsums calculated by
    the destination choice step.

    Parameters
    ----------
    zones : DataFrameWrapper
        zone attributes
    trace_od : list or dict


    Returns
    -------
    Nothing. Balances trips table and writes trace tables
    """

    logger.info('running trip balancing step ...')

    model_settings = config.read_model_settings(YAML_FILENAME)

    trips_df = get_trips_df(model_settings)
    trace_rows = trace.trace_filter(trips_df, trace_od)
    tracing.write_csv(trips_df[trace_rows],
                      file_name='trips_unbalanced',
                      transpose=False)

    trips_df = trips_df.melt(
                id_vars=['orig', 'dest'],
                var_name='segment',
                value_name='trips')

    dest_targets = model_settings.get(DEST_TARGETS)
    orig_targets = model_settings.get(ORIG_TARGETS)
    max_iterations = model_settings.get('max_iterations', 50)
    closure = model_settings.get('balance_closure', 0.001)

    aggregates, dimensions = calculate_aggregates(trips_df,
                                                  zones.to_frame(),
                                                  dest_targets,
                                                  orig_targets)

    balancer = Balancer(trips_df.reset_index(),
                        aggregates,
                        dimensions,
                        weight_col='trips',
                        max_iteration=max_iterations,
                        closure=closure)
    balanced_df = balancer.balance()

    balanced_trips = balanced_df.set_index(['orig', 'dest', 'segment'])['trips'].unstack()
    tracing.write_csv(balanced_trips.reset_index()[trace_rows],
                      file_name='trips_balanced',
                      transpose=False)
    pipeline.replace_table('trips', balanced_trips)

    logger.info('finished balancing trips.')
def joint_tour_composition(
        tours, households, persons,
        chunk_size,
        trace_hh_id):
    """
    This model predicts the makeup of the travel party (adults, children, or mixed).
    """
    trace_label = 'joint_tour_composition'

    model_settings = config.read_model_settings('joint_tour_composition.yaml')
    model_spec = simulate.read_model_spec(file_name='joint_tour_composition.csv')

    tours = tours.to_frame()
    joint_tours = tours[tours.tour_category == 'joint']

    # - if no joint tours
    if joint_tours.shape[0] == 0:
        add_null_results(trace_label, tours)
        return

    # - only interested in households with joint_tours
    households = households.to_frame()
    households = households[households.num_hh_joint_tours > 0]

    persons = persons.to_frame()
    persons = persons[persons.household_id.isin(households.index)]

    logger.info("Running joint_tour_composition with %d joint tours" % joint_tours.shape[0])

    # - run preprocessor
    preprocessor_settings = model_settings.get('preprocessor', None)
    if preprocessor_settings:

        locals_dict = {
            'persons': persons,
            'hh_time_window_overlap': hh_time_window_overlap
        }

        expressions.assign_columns(
            df=households,
            model_settings=preprocessor_settings,
            locals_dict=locals_dict,
            trace_label=trace_label)

    joint_tours_merged = pd.merge(joint_tours, households,
                                  left_on='household_id', right_index=True, how='left')

    # - simple_simulate

    nest_spec = config.get_logit_model_settings(model_settings)
    constants = config.get_model_constants(model_settings)

    choices = simulate.simple_simulate(
        choosers=joint_tours_merged,
        spec=model_spec,
        nest_spec=nest_spec,
        locals_d=constants,
        chunk_size=chunk_size,
        trace_label=trace_label,
        trace_choice_name='composition')

    # convert indexes to alternative names
    choices = pd.Series(model_spec.columns[choices.values], index=choices.index)

    # add composition column to tours for tracing
    joint_tours['composition'] = choices

    # reindex since we ran model on a subset of households
    tours['composition'] = choices.reindex(tours.index).fillna('').astype(str)
    pipeline.replace_table("tours", tours)

    tracing.print_summary('joint_tour_composition', joint_tours.composition,
                          value_counts=True)

    if trace_hh_id:
        tracing.trace_df(joint_tours,
                         label="joint_tour_composition.joint_tours",
                         slicer='household_id')
def joint_tour_scheduling(
        tours,
        persons_merged,
        tdd_alts,
        chunk_size,
        trace_hh_id):
    """
    This model predicts the departure time and duration of each joint tour
    """
    trace_label = 'joint_tour_scheduling'
    model_settings = config.read_model_settings('joint_tour_scheduling.yaml')
    model_spec = simulate.read_model_spec(file_name='tour_scheduling_joint.csv')

    tours = tours.to_frame()
    joint_tours = tours[tours.tour_category == 'joint']

    # - if no joint tours
    if joint_tours.shape[0] == 0:
        tracing.no_results(trace_label)
        return

    # use inject.get_table as this won't exist if there are no joint_tours
    joint_tour_participants = inject.get_table('joint_tour_participants').to_frame()

    persons_merged = persons_merged.to_frame()

    logger.info("Running %s with %d joint tours", trace_label, joint_tours.shape[0])

    # it may seem peculiar that we are concerned with persons rather than households
    # but every joint tour is (somewhat arbitrarily) assigned a "primary person"
    # some of whose characteristics are used in the spec
    # and we get household attributes along with person attributes in persons_merged
    persons_merged = persons_merged[persons_merged.num_hh_joint_tours > 0]

    # since a households joint tours each potentially different participants
    # they may also have different joint tour masks (free time of all participants)
    # so we have to either chunk processing by joint_tour_num and build timetable by household
    # or build timetables by unique joint_tour

    constants = config.get_model_constants(model_settings)

    # - run preprocessor to annotate choosers
    preprocessor_settings = model_settings.get('preprocessor', None)
    if preprocessor_settings:

        locals_d = {}
        if constants is not None:
            locals_d.update(constants)

        expressions.assign_columns(
            df=joint_tours,
            model_settings=preprocessor_settings,
            locals_dict=locals_d,
            trace_label=trace_label)

    tdd_choices, timetable = vectorize_joint_tour_scheduling(
        joint_tours, joint_tour_participants,
        persons_merged,
        tdd_alts,
        spec=model_spec,
        model_settings=model_settings,
        chunk_size=chunk_size,
        trace_label=trace_label)

    timetable.replace_table()

    assign_in_place(tours, tdd_choices)
    pipeline.replace_table("tours", tours)

    # updated df for tracing
    joint_tours = tours[tours.tour_category == 'joint']

    if trace_hh_id:
        tracing.trace_df(joint_tours,
                         label="joint_tour_scheduling",
                         slicer='household_id')
def trip_purpose_and_destination(
        trips,
        tours_merged,
        chunk_size,
        trace_hh_id):

    trace_label = "trip_purpose_and_destination"
    model_settings = config.read_model_settings('trip_purpose_and_destination.yaml')

    # for consistency, read sample_table_name setting from trip_destination settings file
    trip_destination_model_settings = config.read_model_settings('trip_destination.yaml')
    sample_table_name = trip_destination_model_settings.get('DEST_CHOICE_SAMPLE_TABLE_NAME')
    want_sample_table = config.setting('want_dest_choice_sample_tables') and sample_table_name is not None

    MAX_ITERATIONS = model_settings.get('MAX_ITERATIONS', 5)

    trips_df = trips.to_frame()
    tours_merged_df = tours_merged.to_frame()

    if trips_df.empty:
        logger.info("%s - no trips. Nothing to do." % trace_label)
        return

    # FIXME could allow MAX_ITERATIONS=0 to allow for cleanup-only run
    # in which case, we would need to drop bad trips, WITHOUT failing bad_trip leg_mates
    assert (MAX_ITERATIONS > 0)

    # if trip_destination has been run before, keep only failed trips (and leg_mates) to retry
    if 'destination' in trips_df:

        if 'failed' not in trips_df.columns:
            # trip_destination model cleaned up any failed trips
            logger.info("%s - no failed column from prior model run." % trace_label)
            return

        elif not trips_df.failed.any():
            # 'failed' column but no failed trips from prior run of trip_destination
            logger.info("%s - no failed trips from prior model run." % trace_label)
            trips_df.drop(columns='failed', inplace=True)
            pipeline.replace_table("trips", trips_df)
            return

        else:
            logger.info("trip_destination has already been run. Rerunning failed trips")
            flag_failed_trip_leg_mates(trips_df, 'failed')
            trips_df = trips_df[trips_df.failed]
            tours_merged_df = tours_merged_df[tours_merged_df.index.isin(trips_df.tour_id)]
            logger.info("Rerunning %s failed trips and leg-mates" % trips_df.shape[0])

            # drop any previously saved samples of failed trips
            if want_sample_table and pipeline.is_table(sample_table_name):
                logger.info("Dropping any previously saved samples of failed trips")
                save_sample_df = pipeline.get_table(sample_table_name)
                save_sample_df.drop(trips_df.index, level='trip_id', inplace=True)
                pipeline.replace_table(sample_table_name, save_sample_df)
                del save_sample_df

    processed_trips = []
    save_samples = []
    i = 0
    TRIP_RESULT_COLUMNS = ['purpose', 'destination', 'origin', 'failed']
    while True:

        i += 1

        for c in TRIP_RESULT_COLUMNS:
            if c in trips_df:
                del trips_df[c]

        trips_df, save_sample_df = run_trip_purpose_and_destination(
            trips_df,
            tours_merged_df,
            chunk_size=chunk_size,
            trace_hh_id=trace_hh_id,
            trace_label=tracing.extend_trace_label(trace_label, "i%s" % i))

        # # if testing, make sure at least one trip fails
        if config.setting('testing_fail_trip_destination', False) \
                and (i == 1) and not trips_df.failed.any():
            fail_o = trips_df[trips_df.trip_num < trips_df.trip_count].origin.max()
            trips_df.failed = (trips_df.origin == fail_o) & \
                              (trips_df.trip_num < trips_df.trip_count)

        num_failed_trips = trips_df.failed.sum()

        # if there were no failed trips, we are done
        if num_failed_trips == 0:
            processed_trips.append(trips_df[TRIP_RESULT_COLUMNS])
            if save_sample_df is not None:
                save_samples.append(save_sample_df)
            break

        logger.warning("%s %s failed trips in iteration %s" % (trace_label, num_failed_trips, i))
        file_name = "%s_i%s_failed_trips" % (trace_label, i)
        logger.info("writing failed trips to %s" % file_name)
        tracing.write_csv(trips_df[trips_df.failed], file_name=file_name, transpose=False)

        # if max iterations reached, add remaining trips to processed_trips and give up
        # note that we do this BEFORE failing leg_mates so resulting trip legs are complete
        if i >= MAX_ITERATIONS:
            logger.warning("%s too many iterations %s" % (trace_label, i))
            processed_trips.append(trips_df[TRIP_RESULT_COLUMNS])
            if save_sample_df is not None:
                save_sample_df.drop(trips_df[trips_df.failed].index, level='trip_id', inplace=True)
                save_samples.append(save_sample_df)
            break

        # otherwise, if any trips failed, then their leg-mates trips must also fail
        flag_failed_trip_leg_mates(trips_df, 'failed')

        # add the good trips to processed_trips
        processed_trips.append(trips_df[~trips_df.failed][TRIP_RESULT_COLUMNS])

        # and keep the failed ones to retry
        trips_df = trips_df[trips_df.failed]
        tours_merged_df = tours_merged_df[tours_merged_df.index.isin(trips_df.tour_id)]

        #  add trip samples of processed_trips to processed_samples
        if save_sample_df is not None:
            # drop failed trip samples
            save_sample_df.drop(trips_df.index, level='trip_id', inplace=True)
            save_samples.append(save_sample_df)

    # - assign result columns to trips
    processed_trips = pd.concat(processed_trips)

    if len(save_samples) > 0:
        save_sample_df = pd.concat(save_samples)
        logger.info("adding %s samples to %s" % (len(save_sample_df), sample_table_name))
        pipeline.extend_table(sample_table_name, save_sample_df)

    logger.info("%s %s failed trips after %s iterations" %
                (trace_label, processed_trips.failed.sum(), i))

    trips_df = trips.to_frame()
    assign_in_place(trips_df, processed_trips)

    trips_df = cleanup_failed_trips(trips_df)

    pipeline.replace_table("trips", trips_df)

    # check to make sure we wrote sample file if requestsd
    if want_sample_table and len(trips_df) > 0:
        assert pipeline.is_table(sample_table_name)
        # since we have saved samples for all successful trips
        # once we discard failed trips, we should samples for all trips
        save_sample_df = pipeline.get_table(sample_table_name)
        # expect samples only for intermediate trip destinatinos
        assert \
            len(save_sample_df.index.get_level_values(0).unique()) == \
            len(trips_df[trips_df.trip_num < trips_df.trip_count])
        del save_sample_df

    if trace_hh_id:
        tracing.trace_df(trips_df,
                         label=trace_label,
                         slicer='trip_id',
                         index_label='trip_id',
                         warn_if_empty=True)
Exemple #18
0
def trip_mode_choice(
        trips,
        tours_merged,
        skim_dict, skim_stack,
        chunk_size, trace_hh_id):
    """
    Trip mode choice - compute trip_mode (same values as for tour_mode) for each trip.

    Modes for each primary tour putpose are calculated separately because they have different
    coefficient values (stored in trip_mode_choice_coeffs.csv coefficient file.)

    Adds trip_mode column to trip table
    """
    trace_label = 'trip_mode_choice'
    model_settings = config.read_model_settings('trip_mode_choice.yaml')

    model_spec = \
        simulate.read_model_spec(file_name=model_settings['SPEC'])
    omnibus_coefficients = \
        assign.read_constant_spec(config.config_file_path(model_settings['COEFFS']))

    trips_df = trips.to_frame()
    logger.info("Running %s with %d trips", trace_label, trips_df.shape[0])

    tours_merged = tours_merged.to_frame()
    tours_merged = tours_merged[model_settings['TOURS_MERGED_CHOOSER_COLUMNS']]

    nest_spec = config.get_logit_model_settings(model_settings)

    tracing.print_summary('primary_purpose',
                          trips_df.primary_purpose, value_counts=True)

    # - trips_merged - merge trips and tours_merged
    trips_merged = pd.merge(
        trips_df,
        tours_merged,
        left_on='tour_id',
        right_index=True,
        how="left")
    assert trips_merged.index.equals(trips.index)

    # setup skim keys
    assert ('trip_period' not in trips_merged)
    trips_merged['trip_period'] = skim_time_period_label(trips_merged.depart)

    orig_col = 'origin'
    dest_col = 'destination'

    odt_skim_stack_wrapper = skim_stack.wrap(left_key=orig_col, right_key=dest_col,
                                             skim_key='trip_period')
    od_skim_wrapper = skim_dict.wrap('origin', 'destination')

    skims = {
        "odt_skims": odt_skim_stack_wrapper,
        "od_skims": od_skim_wrapper,
    }

    constants = config.get_model_constants(model_settings)
    constants.update({
        'ORIGIN': orig_col,
        'DESTINATION': dest_col
    })

    choices_list = []
    for primary_purpose, trips_segment in trips_merged.groupby('primary_purpose'):

        segment_trace_label = tracing.extend_trace_label(trace_label, primary_purpose)

        logger.info("trip_mode_choice tour_type '%s' (%s trips)" %
                    (primary_purpose, len(trips_segment.index), ))

        # name index so tracing knows how to slice
        assert trips_segment.index.name == 'trip_id'

        locals_dict = assign.evaluate_constants(omnibus_coefficients[primary_purpose],
                                                constants=constants)
        locals_dict.update(constants)

        annotate_preprocessors(
            trips_segment, locals_dict, skims,
            model_settings, segment_trace_label)

        locals_dict.update(skims)
        choices = simulate.simple_simulate(
            choosers=trips_segment,
            spec=model_spec,
            nest_spec=nest_spec,
            skims=skims,
            locals_d=locals_dict,
            chunk_size=chunk_size,
            trace_label=segment_trace_label,
            trace_choice_name='trip_mode_choice')

        alts = model_spec.columns
        choices = choices.map(dict(list(zip(list(range(len(alts))), alts))))

        # tracing.print_summary('trip_mode_choice %s choices' % primary_purpose,
        #                       choices, value_counts=True)

        if trace_hh_id:
            # trace the coefficients
            tracing.trace_df(pd.Series(locals_dict),
                             label=tracing.extend_trace_label(segment_trace_label, 'constants'),
                             transpose=False,
                             slicer='NONE')

            # so we can trace with annotations
            trips_segment['trip_mode'] = choices
            tracing.trace_df(trips_segment,
                             label=tracing.extend_trace_label(segment_trace_label, 'trip_mode'),
                             slicer='tour_id',
                             index_label='tour_id',
                             warn_if_empty=True)

        choices_list.append(choices)

        # FIXME - force garbage collection
        force_garbage_collect()

    choices = pd.concat(choices_list)

    trips_df = trips.to_frame()
    trips_df['trip_mode'] = choices

    tracing.print_summary('tour_modes',
                          trips_merged.tour_mode, value_counts=True)

    tracing.print_summary('trip_mode_choice choices',
                          choices, value_counts=True)

    assert not trips_df.trip_mode.isnull().any()

    pipeline.replace_table("trips", trips_df)

    if trace_hh_id:
        tracing.trace_df(trips_df,
                         label=tracing.extend_trace_label(trace_label, 'trip_mode'),
                         slicer='trip_id',
                         index_label='trip_id',
                         warn_if_empty=True)
def non_mandatory_tour_frequency(persons, persons_merged,
                                 chunk_size,
                                 trace_hh_id):
    """
    This model predicts the frequency of making non-mandatory trips
    (alternatives for this model come from a separate csv file which is
    configured by the user) - these trips include escort, shopping, othmaint,
    othdiscr, eatout, and social trips in various combination.
    """

    trace_label = 'non_mandatory_tour_frequency'
    model_settings = config.read_model_settings('non_mandatory_tour_frequency.yaml')
    model_spec = simulate.read_model_spec(file_name='non_mandatory_tour_frequency.csv')

    alternatives = simulate.read_model_alts(
        config.config_file_path('non_mandatory_tour_frequency_alternatives.csv'),
        set_index=None)

    choosers = persons_merged.to_frame()

    # FIXME kind of tacky both that we know to add this here and del it below
    # 'tot_tours' is used in model_spec expressions
    alternatives['tot_tours'] = alternatives.sum(axis=1)

    # - preprocessor
    preprocessor_settings = model_settings.get('preprocessor', None)
    if preprocessor_settings:

        locals_dict = {
            'person_max_window': person_max_window
        }

        expressions.assign_columns(
            df=choosers,
            model_settings=preprocessor_settings,
            locals_dict=locals_dict,
            trace_label=trace_label)

    # filter based on results of CDAP
    choosers = choosers[choosers.cdap_activity.isin(['M', 'N'])]

    logger.info("Running non_mandatory_tour_frequency with %d persons", len(choosers))

    constants = config.get_model_constants(model_settings)

    choices_list = []
    # segment by person type and pick the right spec for each person type
    for ptype, segment in choosers.groupby('ptype'):

        name = PTYPE_NAME[ptype]

        # pick the spec column for the segment
        spec = model_spec[[name]]

        # drop any zero-valued rows
        spec = spec[spec[name] != 0]

        logger.info("Running segment '%s' of size %d", name, len(segment))

        choices = interaction_simulate(
            segment,
            alternatives,
            spec=spec,
            locals_d=constants,
            chunk_size=chunk_size,
            trace_label='non_mandatory_tour_frequency.%s' % name,
            trace_choice_name='non_mandatory_tour_frequency')

        choices_list.append(choices)

        # FIXME - force garbage collection?
        # force_garbage_collect()

    choices = pd.concat(choices_list)

    del alternatives['tot_tours']  # del tot_tours column we added above

    # - add non_mandatory_tour_frequency column to persons
    persons = persons.to_frame()
    # need to reindex as we only handled persons with cdap_activity in ['M', 'N']
    # (we expect there to be an alt with no tours - which we can use to backfill non-travelers)
    no_tours_alt = (alternatives.sum(axis=1) == 0).index[0]
    persons['non_mandatory_tour_frequency'] = \
        choices.reindex(persons.index).fillna(no_tours_alt).astype(np.int8)

    """
    We have now generated non-mandatory tours, but they are attributes of the person table
    Now we create a "tours" table which has one row per tour that has been generated
    (and the person id it is associated with)
    """

    # - get counts of each of the alternatives (so we can extend)
    # (choices is just the index values for the chosen alts)
    """
               escort  shopping  othmaint  othdiscr    eatout    social
    parent_id
    2588676         2         0         0         1         1         0
    2588677         0         1         0         1         0         0
    """
    tour_counts = alternatives.loc[choices]
    tour_counts.index = choices.index  # assign person ids to the index

    prev_tour_count = tour_counts.sum().sum()

    # - extend_tour_counts
    tour_counts = extend_tour_counts(choosers, tour_counts, alternatives,
                                     trace_hh_id,
                                     tracing.extend_trace_label(trace_label, 'extend_tour_counts'))

    extended_tour_count = tour_counts.sum().sum()

    logging.info("extend_tour_counts increased nmtf tour count by %s from %s to %s" %
                 (extended_tour_count - prev_tour_count, prev_tour_count, extended_tour_count))

    # - create the non_mandatory tours
    non_mandatory_tours = process_non_mandatory_tours(persons, tour_counts)
    assert len(non_mandatory_tours) == extended_tour_count

    pipeline.extend_table("tours", non_mandatory_tours)

    tracing.register_traceable_table('tours', non_mandatory_tours)
    pipeline.get_rn_generator().add_channel('tours', non_mandatory_tours)

    expressions.assign_columns(
        df=persons,
        model_settings=model_settings.get('annotate_persons'),
        trace_label=trace_label)

    pipeline.replace_table("persons", persons)

    tracing.print_summary('non_mandatory_tour_frequency',
                          persons.non_mandatory_tour_frequency, value_counts=True)

    if trace_hh_id:
        tracing.trace_df(non_mandatory_tours,
                         label="non_mandatory_tour_frequency.non_mandatory_tours",
                         warn_if_empty=True)

        tracing.trace_df(choosers,
                         label="non_mandatory_tour_frequency.choosers",
                         warn_if_empty=True)

        tracing.trace_df(persons,
                         label="non_mandatory_tour_frequency.annotated_persons",
                         warn_if_empty=True)
def atwork_subtour_mode_choice(
        tours,
        persons_merged,
        skim_dict, skim_stack,
        chunk_size,
        trace_hh_id):
    """
    At-work subtour mode choice simulate
    """

    trace_label = 'atwork_subtour_mode_choice'

    model_settings = config.read_model_settings('tour_mode_choice.yaml')

    spec = tour_mode_choice_spec(model_settings)

    tours = tours.to_frame()
    subtours = tours[tours.tour_category == 'atwork']

    # - if no atwork subtours
    if subtours.shape[0] == 0:
        tracing.no_results(trace_label)
        return

    subtours_merged = \
        pd.merge(subtours, persons_merged.to_frame(),
                 left_on='person_id', right_index=True, how='left')

    nest_spec = config.get_logit_model_settings(model_settings)
    constants = config.get_model_constants(model_settings)

    logger.info("Running %s with %d subtours" % (trace_label, subtours_merged.shape[0]))

    tracing.print_summary('%s tour_type' % trace_label,
                          subtours_merged.tour_type, value_counts=True)

    # setup skim keys
    orig_col_name = 'workplace_taz'
    dest_col_name = 'destination'
    out_time_col_name = 'start'
    in_time_col_name = 'end'
    odt_skim_stack_wrapper = skim_stack.wrap(left_key=orig_col_name, right_key=dest_col_name,
                                             skim_key='out_period')
    dot_skim_stack_wrapper = skim_stack.wrap(left_key=dest_col_name, right_key=orig_col_name,
                                             skim_key='in_period')
    od_skim_stack_wrapper = skim_dict.wrap(orig_col_name, dest_col_name)

    skims = {
        "odt_skims": odt_skim_stack_wrapper,
        "dot_skims": dot_skim_stack_wrapper,
        "od_skims": od_skim_stack_wrapper,
        'orig_col_name': orig_col_name,
        'dest_col_name': dest_col_name,
        'out_time_col_name': out_time_col_name,
        'in_time_col_name': in_time_col_name
    }

    choices = run_tour_mode_choice_simulate(
        subtours_merged,
        spec, tour_purpose='atwork', model_settings=model_settings,
        skims=skims,
        constants=constants,
        nest_spec=nest_spec,
        chunk_size=chunk_size,
        trace_label=trace_label,
        trace_choice_name='tour_mode_choice')

    tracing.print_summary('%s choices' % trace_label, choices, value_counts=True)

    assign_in_place(tours, choices.to_frame('tour_mode'))
    pipeline.replace_table("tours", tours)

    if trace_hh_id:
        tracing.trace_df(tours[tours.tour_category == 'atwork'],
                         label=tracing.extend_trace_label(trace_label, 'tour_mode'),
                         slicer='tour_id',
                         index_label='tour_id')

    force_garbage_collect()
def tour_mode_choice_simulate(tours, persons_merged, network_los, chunk_size,
                              trace_hh_id):
    """
    Tour mode choice simulate
    """
    trace_label = 'tour_mode_choice'
    model_settings_file_name = 'tour_mode_choice.yaml'
    model_settings = config.read_model_settings(model_settings_file_name)

    logsum_column_name = model_settings.get('MODE_CHOICE_LOGSUM_COLUMN_NAME')
    mode_column_name = 'tour_mode'  # FIXME - should be passed in?

    primary_tours = tours.to_frame()
    assert not (primary_tours.tour_category == 'atwork').any()

    logger.info("Running %s with %d tours" %
                (trace_label, primary_tours.shape[0]))

    tracing.print_summary('tour_types',
                          primary_tours.tour_type,
                          value_counts=True)

    persons_merged = persons_merged.to_frame()
    primary_tours_merged = pd.merge(primary_tours,
                                    persons_merged,
                                    left_on='person_id',
                                    right_index=True,
                                    how='left',
                                    suffixes=('', '_r'))

    constants = {}
    # model_constants can appear in expressions
    constants.update(config.get_model_constants(model_settings))

    skim_dict = network_los.get_default_skim_dict()

    # setup skim keys
    orig_col_name = 'home_zone_id'
    dest_col_name = 'destination'

    out_time_col_name = 'start'
    in_time_col_name = 'end'
    odt_skim_stack_wrapper = skim_dict.wrap_3d(orig_key=orig_col_name,
                                               dest_key=dest_col_name,
                                               dim3_key='out_period')
    dot_skim_stack_wrapper = skim_dict.wrap_3d(orig_key=dest_col_name,
                                               dest_key=orig_col_name,
                                               dim3_key='in_period')
    odr_skim_stack_wrapper = skim_dict.wrap_3d(orig_key=orig_col_name,
                                               dest_key=dest_col_name,
                                               dim3_key='in_period')
    dor_skim_stack_wrapper = skim_dict.wrap_3d(orig_key=dest_col_name,
                                               dest_key=orig_col_name,
                                               dim3_key='out_period')
    od_skim_stack_wrapper = skim_dict.wrap(orig_col_name, dest_col_name)

    skims = {
        "odt_skims": odt_skim_stack_wrapper,
        "dot_skims": dot_skim_stack_wrapper,
        "odr_skims":
        odr_skim_stack_wrapper,  # dot return skims for e.g. TNC bridge return fare
        "dor_skims":
        dor_skim_stack_wrapper,  # odt return skims for e.g. TNC bridge return fare
        "od_skims": od_skim_stack_wrapper,
        'orig_col_name': orig_col_name,
        'dest_col_name': dest_col_name,
        'out_time_col_name': out_time_col_name,
        'in_time_col_name': in_time_col_name
    }

    if network_los.zone_system == los.THREE_ZONE:
        # fixme - is this a lightweight object?

        tvpb = network_los.tvpb

        tvpb_logsum_odt = tvpb.wrap_logsum(orig_key=orig_col_name,
                                           dest_key=dest_col_name,
                                           tod_key='out_period',
                                           segment_key='demographic_segment',
                                           cache_choices=True,
                                           trace_label=trace_label,
                                           tag='tvpb_logsum_odt')
        tvpb_logsum_dot = tvpb.wrap_logsum(orig_key=dest_col_name,
                                           dest_key=orig_col_name,
                                           tod_key='in_period',
                                           segment_key='demographic_segment',
                                           cache_choices=True,
                                           trace_label=trace_label,
                                           tag='tvpb_logsum_dot')

        skims.update({
            'tvpb_logsum_odt': tvpb_logsum_odt,
            'tvpb_logsum_dot': tvpb_logsum_dot
        })

        # TVPB constants can appear in expressions
        constants.update(
            network_los.setting('TVPB_SETTINGS.tour_mode_choice.CONSTANTS'))

    estimator = estimation.manager.begin_estimation('tour_mode_choice')
    if estimator:
        estimator.write_coefficients(
            simulate.read_model_coefficients(model_settings))
        estimator.write_coefficients_template(
            simulate.read_model_coefficient_template(model_settings))
        estimator.write_spec(model_settings)
        estimator.write_model_settings(model_settings,
                                       model_settings_file_name)
        # (run_tour_mode_choice_simulate writes choosers post-annotation)

    # FIXME should normalize handling of tour_type and tour_purpose
    # mtctm1 school tour_type includes univ, which has different coefficients from elementary and HS
    # we should either add this column when tours created or add univ to tour_types
    not_university = (primary_tours_merged.tour_type !=
                      'school') | ~primary_tours_merged.is_university
    primary_tours_merged['tour_purpose'] = \
        primary_tours_merged.tour_type.where(not_university, 'univ')

    choices_list = []
    for tour_purpose, tours_segment in primary_tours_merged.groupby(
            'tour_purpose'):

        logger.info("tour_mode_choice_simulate tour_type '%s' (%s tours)" % (
            tour_purpose,
            len(tours_segment.index),
        ))

        if network_los.zone_system == los.THREE_ZONE:
            tvpb_logsum_odt.extend_trace_label(tour_purpose)
            tvpb_logsum_dot.extend_trace_label(tour_purpose)

        # name index so tracing knows how to slice
        assert tours_segment.index.name == 'tour_id'

        choices_df = run_tour_mode_choice_simulate(
            tours_segment,
            tour_purpose,
            model_settings,
            mode_column_name=mode_column_name,
            logsum_column_name=logsum_column_name,
            network_los=network_los,
            skims=skims,
            constants=constants,
            estimator=estimator,
            chunk_size=chunk_size,
            trace_label=tracing.extend_trace_label(trace_label, tour_purpose),
            trace_choice_name='tour_mode_choice')

        tracing.print_summary('tour_mode_choice_simulate %s choices_df' %
                              tour_purpose,
                              choices_df.tour_mode,
                              value_counts=True)

        choices_list.append(choices_df)

        # FIXME - force garbage collection
        force_garbage_collect()

    choices_df = pd.concat(choices_list)

    # add cached tvpb_logsum tap choices for modes specified in tvpb_mode_path_types
    if network_los.zone_system == los.THREE_ZONE:

        tvpb_mode_path_types = model_settings.get('tvpb_mode_path_types')
        for mode, path_types in tvpb_mode_path_types.items():

            for direction, skim in zip(['od', 'do'],
                                       [tvpb_logsum_odt, tvpb_logsum_dot]):

                path_type = path_types[direction]
                skim_cache = skim.cache[path_type]

                print(
                    f"mode {mode} direction {direction} path_type {path_type}")

                for c in skim_cache:

                    dest_col = f'{direction}_{c}'

                    if dest_col not in choices_df:
                        choices_df[
                            dest_col] = 0 if pd.api.types.is_numeric_dtype(
                                skim_cache[c]) else ''
                    choices_df[dest_col].where(choices_df.tour_mode != mode,
                                               skim_cache[c],
                                               inplace=True)

    if estimator:
        estimator.write_choices(choices_df.tour_mode)
        choices_df.tour_mode = estimator.get_survey_values(
            choices_df.tour_mode, 'tours', 'tour_mode')
        estimator.write_override_choices(choices_df.tour_mode)
        estimator.end_estimation()

    tracing.print_summary('tour_mode_choice_simulate all tour type choices',
                          choices_df.tour_mode,
                          value_counts=True)

    # so we can trace with annotations
    assign_in_place(primary_tours, choices_df)

    # update tours table with mode choice (and optionally logsums)
    all_tours = tours.to_frame()
    assign_in_place(all_tours, choices_df)

    pipeline.replace_table("tours", all_tours)

    if trace_hh_id:
        tracing.trace_df(primary_tours,
                         label=tracing.extend_trace_label(
                             trace_label, mode_column_name),
                         slicer='tour_id',
                         index_label='tour_id',
                         warn_if_empty=True)
Exemple #22
0
def meta_control_factoring(settings, control_spec, incidence_table):
    """
    Apply simple factoring to summed household fractional weights based on original
    meta control values relative to summed household fractional weights by meta zone.

    The resulting factored meta control weights will be new meta controls appended as
    additional columns to the seed control table, for final balancing.

    Parameters
    ----------
    settings : dict (settings.yaml as dict)
    control_spec : pipeline table
    incidence_table : pipeline table

    Returns
    -------

    """

    # FIXME - if there is only one seed zone in the meta zone, just copy meta control values?

    incidence_df = incidence_table.to_frame()
    control_spec = control_spec.to_frame()

    geographies = settings.get('geographies')
    seed_geography = settings.get('seed_geography')
    meta_geography = geographies[0]

    # - if there are no meta controls, then we don't have to do anything
    if not (control_spec.geography == meta_geography).any():
        logger.warn("meta_control_factoring: no meta targets so nothing to do")
        return

    meta_controls_df = get_control_table(meta_geography)
    dump_table("meta_controls_df", meta_controls_df)

    # slice control_spec to select only the rows for meta level controls
    meta_controls_spec = control_spec[control_spec.geography == meta_geography]
    meta_control_targets = meta_controls_spec['target']

    logger.info("meta_control_factoring %s targets" % len(meta_control_targets))

    dump_table("meta_controls_spec", meta_controls_spec)
    dump_table("meta_control_targets", meta_control_targets)

    # seed level weights of all households (rows aligned with incidence_df rows)
    seed_weights_df = get_weight_table(seed_geography)
    assert len(incidence_df.index) == len(seed_weights_df.index)

    # expand person weights by incidence (incidnece will simply be 1 for household targets)
    hh_level_weights = incidence_df[[seed_geography, meta_geography]].copy()
    for target in meta_control_targets:
        hh_level_weights[target] = \
            incidence_df[target] * seed_weights_df['preliminary_balanced_weight']

    dump_table("hh_level_weights", hh_level_weights)

    # weights of meta targets at seed level
    factored_seed_weights = \
        hh_level_weights.groupby([seed_geography, meta_geography], as_index=False).sum()
    factored_seed_weights.set_index(seed_geography, inplace=True)
    dump_table("factored_seed_weights", factored_seed_weights)

    # weights of meta targets summed from seed level to  meta level
    factored_meta_weights = factored_seed_weights.groupby(meta_geography, as_index=True).sum()
    dump_table("factored_meta_weights", factored_meta_weights)

    # only the meta level controls from meta_controls table
    meta_controls_df = meta_controls_df[meta_control_targets]
    dump_table("meta_controls_df", meta_controls_df)

    # compute the scaling factors to be applied to the seed-level totals:
    meta_factors = pd.DataFrame(index=meta_controls_df.index)
    for target in meta_control_targets:
        meta_factors[target] = meta_controls_df[target] / factored_meta_weights[target]
    dump_table("meta_factors", meta_factors)

    # compute seed-level controls from meta-level controls
    seed_level_meta_controls = pd.DataFrame(index=factored_seed_weights.index)
    for target in meta_control_targets:
        #  meta level scaling_factor for this meta_control
        scaling_factor = factored_seed_weights[meta_geography].map(meta_factors[target])
        # scale the seed_level_meta_controls by meta_level scaling_factor
        seed_level_meta_controls[target] = factored_seed_weights[target] * scaling_factor
        # FIXME - why round scaled factored seed_weights to int prior to final seed balancing?
        seed_level_meta_controls[target] = seed_level_meta_controls[target].round().astype(int)
    dump_table("seed_level_meta_controls", seed_level_meta_controls)

    # create final balancing controls
    # add newly created seed_level_meta_controls to the existing set of seed level controls

    seed_controls_df = get_control_table(seed_geography)

    assert len(seed_controls_df.index) == len(seed_level_meta_controls.index)
    seed_controls_df = pd.concat([seed_controls_df, seed_level_meta_controls], axis=1)

    # ensure columns are in right order for orca-extended table
    seed_controls_df = seed_controls_df[control_spec.target]
    assert (seed_controls_df.columns == control_spec.target).all()

    dump_table("seed_controls_df", seed_controls_df)

    pipeline.replace_table(control_table_name(seed_geography), seed_controls_df)
Exemple #23
0
def trip_scheduling(trips, tours, chunk_size, trace_hh_id):
    """
    Trip scheduling assigns depart times for trips within the start, end limits of the tour.

    The algorithm is simplistic:

    The first outbound trip starts at the tour start time, and subsequent outbound trips are
    processed in trip_num order, to ensure that subsequent trips do not depart before the
    trip that preceeds them.

    Inbound trips are handled similarly, except in reverse order, starting with the last trip,
    and working backwards to ensure that inbound trips do not depart after the trip that
    succeeds them.

    The probability spec assigns probabilities for depart times, but those possible departs must
    be clipped to disallow depart times outside the tour limits, the departs of prior trips, and
    in the case of work tours, the start/end times of any atwork subtours.

    Scheduling can fail if the probability table assigns zero probabilities to all the available
    depart times in a trip's depart window. (This could be avoided by giving every window a small
    probability, rather than zero, but the existing mtctm1 prob spec does not do this. I believe
    this is due to the its having been generated from a small household travel survey sample
    that lacked any departs for some time periods.)

    Rescheduling the trips that fail (along with their inbound or outbound leg-mates) can sometimes
    fix this problem, if it was caused by an earlier trip's depart choice blocking a subsequent
    trip's ability to schedule a depart within the resulting window. But it can also happen if
    a tour is very short (e.g. one time period) and the prob spec having a zero probability for
    that tour hour.

    Therefor we need to handle trips that could not be scheduled. There are two ways (at least)
    to solve this problem:

    1) CHOOSE_MOST_INITIAL
    simply assign a depart time to the trip, even if it has a zero probability. It makes
    most sense, in this case, to assign the 'most initial' depart time, so that subsequent trips
    are minimally impacted. This can be done in the final iteration, thus affecting only the
    trips that could no be scheduled by the standard approach

    2) drop_and_cleanup
    drop trips that could no be scheduled, and adjust their leg mates, as is done for failed
    trips in trip_destination.

    For now we are choosing among these approaches with a manifest constant, but this could
    be made a model setting...

    """
    trace_label = "trip_scheduling"

    model_settings = config.read_model_settings('trip_scheduling.yaml')
    assert 'DEPART_ALT_BASE' in model_settings

    failfix = model_settings.get(FAILFIX, FAILFIX_DEFAULT)

    probs_spec = pd.read_csv(
        config.config_file_path('trip_scheduling_probs.csv'), comment='#')

    trips_df = trips.to_frame()
    tours = tours.to_frame()

    # add tour-based chunk_id so we can chunk all trips in tour together
    trips_df['chunk_id'] = \
        reindex(pd.Series(list(range(tours.shape[0])), tours.index), trips_df.tour_id)

    max_iterations = model_settings.get('MAX_ITERATIONS', 1)
    assert max_iterations > 0

    choices_list = []
    i = 0
    while (i < max_iterations) and not trips_df.empty:

        i += 1
        last_iteration = (i == max_iterations)

        trace_label_i = tracing.extend_trace_label(trace_label, "i%s" % i)
        logger.info("%s scheduling %s trips", trace_label_i, trips_df.shape[0])

        choices = \
            run_trip_scheduling(
                trips_df,
                tours,
                probs_spec,
                model_settings,
                last_iteration=last_iteration,
                trace_hh_id=trace_hh_id,
                chunk_size=chunk_size,
                trace_label=trace_label_i)

        # boolean series of trips whose individual trip scheduling failed
        failed = choices.reindex(trips_df.index).isnull()
        logger.info("%s %s failed", trace_label_i, failed.sum())

        if not last_iteration:
            # boolean series of trips whose leg scheduling failed
            failed_cohorts = failed_trip_cohorts(trips_df, failed)
            trips_df = trips_df[failed_cohorts]
            choices = choices[~failed_cohorts]

        choices_list.append(choices)

    trips_df = trips.to_frame()

    choices = pd.concat(choices_list)
    choices = choices.reindex(trips_df.index)
    if choices.isnull().any():
        logger.warning(
            "%s of %s trips could not be scheduled after %s iterations" %
            (choices.isnull().sum(), trips_df.shape[0], i))

        if failfix != FAILFIX_DROP_AND_CLEANUP:
            raise RuntimeError("%s setting '%s' not enabled in settings" %
                               (FAILFIX, FAILFIX_DROP_AND_CLEANUP))

        trips_df['failed'] = choices.isnull()
        trips_df = cleanup_failed_trips(trips_df)
        choices = choices.reindex(trips_df.index)

    trips_df['depart'] = choices

    assert not trips_df.depart.isnull().any()

    pipeline.replace_table("trips", trips_df)
Exemple #24
0
def stop_frequency(
        tours, tours_merged,
        stop_frequency_alts,
        skim_dict,
        chunk_size,
        trace_hh_id):
    """
    stop frequency model

    For each tour, shoose a number of intermediate inbound stops and outbound stops.
    Create a trip table with inbound and outbound trips.

    Thus, a tour with stop_frequency '2out_0in' will have two outbound and zero inbound stops,
    and four corresponding trips: three outbound, and one inbound.

    Adds stop_frequency str column to trips, with fields

    creates trips table with columns:

    ::

        - person_id
        - household_id
        - tour_id
        - primary_purpose
        - atwork
        - trip_num
        - outbound
        - trip_count

    """

    trace_label = 'stop_frequency'
    model_settings = config.read_model_settings('stop_frequency.yaml')

    tours = tours.to_frame()
    tours_merged = tours_merged.to_frame()

    assert not tours_merged.household_id.isnull().any()

    assert not (tours_merged.origin == -1).any()
    assert not (tours_merged.destination == -1).any()

    nest_spec = config.get_logit_model_settings(model_settings)
    constants = config.get_model_constants(model_settings)

    # - run preprocessor to annotate tours_merged
    preprocessor_settings = model_settings.get('preprocessor', None)
    if preprocessor_settings:

        # hack: preprocessor adds origin column in place if it does not exist already
        od_skim_stack_wrapper = skim_dict.wrap('origin', 'destination')
        skims = [od_skim_stack_wrapper]

        locals_dict = {
            "od_skims": od_skim_stack_wrapper
        }
        if constants is not None:
            locals_dict.update(constants)

        simulate.set_skim_wrapper_targets(tours_merged, skims)

        # this should be pre-slice as some expressions may count tours by type
        annotations = expressions.compute_columns(
            df=tours_merged,
            model_settings=preprocessor_settings,
            locals_dict=locals_dict,
            trace_label=trace_label)

        assign_in_place(tours_merged, annotations)

    tracing.print_summary('stop_frequency segments',
                          tours_merged.primary_purpose, value_counts=True)

    choices_list = []
    for segment_type, choosers in tours_merged.groupby('primary_purpose'):

        logging.info("%s running segment %s with %s chooser rows" %
                     (trace_label, segment_type, choosers.shape[0]))

        spec = simulate.read_model_spec(file_name='stop_frequency_%s.csv' % segment_type)

        assert spec is not None, "spec for segment_type %s not found" % segment_type

        choices = simulate.simple_simulate(
            choosers=choosers,
            spec=spec,
            nest_spec=nest_spec,
            locals_d=constants,
            chunk_size=chunk_size,
            trace_label=tracing.extend_trace_label(trace_label, segment_type),
            trace_choice_name='stops')

        # convert indexes to alternative names
        choices = pd.Series(spec.columns[choices.values], index=choices.index)

        choices_list.append(choices)

    choices = pd.concat(choices_list)

    tracing.print_summary('stop_frequency', choices, value_counts=True)

    # add stop_frequency choices to tours table
    assign_in_place(tours, choices.to_frame('stop_frequency'))

    if 'primary_purpose' not in tours.columns:
        assign_in_place(tours, tours_merged[['primary_purpose']])

    pipeline.replace_table("tours", tours)

    # create trips table
    trips = process_trips(tours, stop_frequency_alts)
    trips = pipeline.extend_table("trips", trips)
    tracing.register_traceable_table('trips', trips)
    pipeline.get_rn_generator().add_channel('trips', trips)

    if trace_hh_id:
        tracing.trace_df(tours,
                         label="stop_frequency.tours",
                         slicer='person_id',
                         columns=None)

        tracing.trace_df(trips,
                         label="stop_frequency.trips",
                         slicer='person_id',
                         columns=None)

        tracing.trace_df(annotations,
                         label="stop_frequency.annotations",
                         columns=None)

        tracing.trace_df(tours_merged,
                         label="stop_frequency.tours_merged",
                         slicer='person_id',
                         columns=None)
Exemple #25
0
def cdap_simulate(persons_merged, persons, households,
                  cdap_indiv_spec,
                  cdap_interaction_coefficients,
                  cdap_fixed_relative_proportions,
                  chunk_size, trace_hh_id):
    """
    CDAP stands for Coordinated Daily Activity Pattern, which is a choice of
    high-level activity pattern for each person, in a coordinated way with other
    members of a person's household.

    Because Python requires vectorization of computation, there are some specialized
    routines in the cdap directory of activitysim for this purpose.  This module
    simply applies those utilities using the simulation framework.
    """

    trace_label = 'cdap'
    model_settings = config.read_model_settings('cdap.yaml')

    persons_merged = persons_merged.to_frame()

    constants = config.get_model_constants(model_settings)

    cdap_interaction_coefficients = \
        cdap.preprocess_interaction_coefficients(cdap_interaction_coefficients)

    # specs are built just-in-time on demand and cached as injectables
    # prebuilding here allows us to write them to the output directory
    # (also when multiprocessing locutor might not see all household sizes)
    logger.info("Pre-building cdap specs")
    for hhsize in range(2, cdap.MAX_HHSIZE + 1):
        spec = cdap.build_cdap_spec(cdap_interaction_coefficients, hhsize, cache=True)
        if inject.get_injectable('locutor', False):
            spec.to_csv(config.output_file_path('cdap_spec_%s.csv' % hhsize), index=True)

    logger.info("Running cdap_simulate with %d persons", len(persons_merged.index))

    choices = cdap.run_cdap(
        persons=persons_merged,
        cdap_indiv_spec=cdap_indiv_spec,
        cdap_interaction_coefficients=cdap_interaction_coefficients,
        cdap_fixed_relative_proportions=cdap_fixed_relative_proportions,
        locals_d=constants,
        chunk_size=chunk_size,
        trace_hh_id=trace_hh_id,
        trace_label=trace_label)

    # - assign results to persons table and annotate
    persons = persons.to_frame()

    choices = choices.reindex(persons.index)
    persons['cdap_activity'] = choices.cdap_activity
    persons['cdap_rank'] = choices.cdap_rank

    expressions.assign_columns(
        df=persons,
        model_settings=model_settings.get('annotate_persons'),
        trace_label=tracing.extend_trace_label(trace_label, 'annotate_persons'))

    pipeline.replace_table("persons", persons)

    # - annotate households table
    households = households.to_frame()
    expressions.assign_columns(
        df=households,
        model_settings=model_settings.get('annotate_households'),
        trace_label=tracing.extend_trace_label(trace_label, 'annotate_households'))
    pipeline.replace_table("households", households)

    tracing.print_summary('cdap_activity', persons.cdap_activity, value_counts=True)
    logger.info("cdap crosstabs:\n%s" %
                pd.crosstab(persons.ptype, persons.cdap_activity, margins=True))

    if trace_hh_id:

        tracing.trace_df(inject.get_table('persons_merged').to_frame(),
                         label="cdap",
                         columns=['ptype', 'cdap_rank', 'cdap_activity'],
                         warn_if_empty=True)
Exemple #26
0
def cdap_simulate(persons_merged, persons, households, chunk_size,
                  trace_hh_id):
    """
    CDAP stands for Coordinated Daily Activity Pattern, which is a choice of
    high-level activity pattern for each person, in a coordinated way with other
    members of a person's household.

    Because Python requires vectorization of computation, there are some specialized
    routines in the cdap directory of activitysim for this purpose.  This module
    simply applies those utilities using the simulation framework.
    """

    trace_label = 'cdap'
    model_settings = config.read_model_settings('cdap.yaml')
    person_type_map = model_settings.get('PERSON_TYPE_MAP', {})
    cdap_indiv_spec = simulate.read_model_spec(
        file_name=model_settings['INDIV_AND_HHSIZE1_SPEC'])

    # Rules and coefficients for generating interaction specs for different household sizes
    cdap_interaction_coefficients = \
        pd.read_csv(config.config_file_path('cdap_interaction_coefficients.csv'), comment='#')
    """
    spec to compute/specify the relative proportions of each activity (M, N, H)
    that should be used to choose activities for additional household members not handled by CDAP
    This spec is handled much like an activitysim logit utility spec,
    EXCEPT that the values computed are relative proportions, not utilities
    (i.e. values are not exponentiated before being normalized to probabilities summing to 1.0)
    """
    cdap_fixed_relative_proportions = \
        simulate.read_model_spec(file_name=model_settings['FIXED_RELATIVE_PROPORTIONS_SPEC'])

    persons_merged = persons_merged.to_frame()

    # add tour-based chunk_id so we can chunk all trips in tour together
    assert 'chunk_id' not in persons_merged.columns
    unique_household_ids = persons_merged.household_id.unique()
    household_chunk_ids = pd.Series(range(len(unique_household_ids)),
                                    index=unique_household_ids)
    persons_merged['chunk_id'] = reindex(household_chunk_ids,
                                         persons_merged.household_id)

    constants = config.get_model_constants(model_settings)

    cdap_interaction_coefficients = \
        cdap.preprocess_interaction_coefficients(cdap_interaction_coefficients)

    # specs are built just-in-time on demand and cached as injectables
    # prebuilding here allows us to write them to the output directory
    # (also when multiprocessing locutor might not see all household sizes)
    logger.info("Pre-building cdap specs")
    for hhsize in range(2, cdap.MAX_HHSIZE + 1):
        spec = cdap.build_cdap_spec(cdap_interaction_coefficients,
                                    hhsize,
                                    cache=True)
        if inject.get_injectable('locutor', False):
            spec.to_csv(config.output_file_path('cdap_spec_%s.csv' % hhsize),
                        index=True)

    estimator = estimation.manager.begin_estimation('cdap')
    if estimator:
        estimator.write_model_settings(model_settings, 'cdap.yaml')
        estimator.write_spec(model_settings, tag='INDIV_AND_HHSIZE1_SPEC')
        estimator.write_spec(model_settings=model_settings,
                             tag='FIXED_RELATIVE_PROPORTIONS_SPEC')
        estimator.write_table(cdap_interaction_coefficients,
                              'interaction_coefficients',
                              index=False,
                              append=False)
        estimator.write_choosers(persons_merged)
        for hhsize in range(2, cdap.MAX_HHSIZE + 1):
            spec = cdap.get_cached_spec(hhsize)
            estimator.write_table(spec, 'spec_%s' % hhsize, append=False)

    logger.info("Running cdap_simulate with %d persons",
                len(persons_merged.index))

    choices = cdap.run_cdap(
        persons=persons_merged,
        person_type_map=person_type_map,
        cdap_indiv_spec=cdap_indiv_spec,
        cdap_interaction_coefficients=cdap_interaction_coefficients,
        cdap_fixed_relative_proportions=cdap_fixed_relative_proportions,
        locals_d=constants,
        chunk_size=chunk_size,
        trace_hh_id=trace_hh_id,
        trace_label=trace_label)

    if estimator:
        estimator.write_choices(choices)
        choices = estimator.get_survey_values(choices, 'persons',
                                              'cdap_activity')
        estimator.write_override_choices(choices)
        estimator.end_estimation()

    # - assign results to persons table and annotate
    persons = persons.to_frame()

    choices = choices.reindex(persons.index)
    persons['cdap_activity'] = choices

    expressions.assign_columns(
        df=persons,
        model_settings=model_settings.get('annotate_persons'),
        trace_label=tracing.extend_trace_label(trace_label,
                                               'annotate_persons'))

    pipeline.replace_table("persons", persons)

    # - annotate households table
    households = households.to_frame()
    expressions.assign_columns(
        df=households,
        model_settings=model_settings.get('annotate_households'),
        trace_label=tracing.extend_trace_label(trace_label,
                                               'annotate_households'))
    pipeline.replace_table("households", households)

    tracing.print_summary('cdap_activity',
                          persons.cdap_activity,
                          value_counts=True)
    logger.info(
        "cdap crosstabs:\n%s" %
        pd.crosstab(persons.ptype, persons.cdap_activity, margins=True))
Exemple #27
0
def iterate_location_choice(
        model_settings,
        persons_merged, persons, households,
        skim_dict, skim_stack,
        estimator,
        chunk_size, trace_hh_id, locutor,
        trace_label):
    """
    iterate run_location_choice updating shadow pricing until convergence criteria satisfied
    or max_iterations reached.

    (If use_shadow_pricing not enabled, then just iterate once)

    Parameters
    ----------
    model_settings : dict
    persons_merged : injected table
    persons : injected table
    skim_dict : skim.SkimDict
    skim_stack : skim.SkimStack
    chunk_size : int
    trace_hh_id : int
    locutor : bool
        whether this process is the privileged logger of shadow_pricing when multiprocessing
    trace_label : str

    Returns
    -------
    adds choice column model_settings['DEST_CHOICE_COLUMN_NAME']
    adds logsum column model_settings['DEST_CHOICE_LOGSUM_COLUMN_NAME']- if provided
    adds annotations to persons table
    """

    # column containing segment id
    chooser_segment_column = model_settings['CHOOSER_SEGMENT_COLUMN_NAME']

    # boolean to filter out persons not needing location modeling (e.g. is_worker, is_student)
    chooser_filter_column = model_settings['CHOOSER_FILTER_COLUMN_NAME']

    dest_choice_column_name = model_settings['DEST_CHOICE_COLUMN_NAME']
    logsum_column_name = model_settings.get('DEST_CHOICE_LOGSUM_COLUMN_NAME')

    sample_table_name = model_settings.get('DEST_CHOICE_SAMPLE_TABLE_NAME')
    want_sample_table = config.setting('want_dest_choice_sample_tables') and sample_table_name is not None

    persons_merged_df = persons_merged.to_frame()

    persons_merged_df = persons_merged_df[persons_merged[chooser_filter_column]]

    persons_merged_df.sort_index(inplace=True)  # interaction_sample expects chooser index to be monotonic increasing

    spc = shadow_pricing.load_shadow_price_calculator(model_settings)
    max_iterations = spc.max_iterations
    assert not (spc.use_shadow_pricing and estimator)

    logger.debug("%s max_iterations: %s" % (trace_label, max_iterations))

    for iteration in range(1, max_iterations + 1):

        if spc.use_shadow_pricing and iteration > 1:
            spc.update_shadow_prices()

        choices_df, save_sample_df = run_location_choice(
            persons_merged_df,
            skim_dict, skim_stack,
            shadow_price_calculator=spc,
            want_logsums=logsum_column_name is not None,
            want_sample_table=want_sample_table,
            estimator=estimator,
            model_settings=model_settings,
            chunk_size=chunk_size,
            trace_hh_id=trace_hh_id,
            trace_label=tracing.extend_trace_label(trace_label, 'i%s' % iteration))

        # choices_df is a pandas DataFrame with columns 'choice' and (optionally) 'logsum'
        if choices_df is None:
            break

        spc.set_choices(
            choices=choices_df['choice'],
            segment_ids=persons_merged_df[chooser_segment_column].reindex(choices_df.index))

        if locutor:
            spc.write_trace_files(iteration)

        if spc.use_shadow_pricing and spc.check_fit(iteration):
            logging.info("%s converged after iteration %s" % (trace_label, iteration,))
            break

    # - shadow price table
    if locutor:
        if spc.use_shadow_pricing and 'SHADOW_PRICE_TABLE' in model_settings:
            inject.add_table(model_settings['SHADOW_PRICE_TABLE'], spc.shadow_prices)
        if 'MODELED_SIZE_TABLE' in model_settings:
            inject.add_table(model_settings['MODELED_SIZE_TABLE'], spc.modeled_size)

    persons_df = persons.to_frame()

    # add the choice values to the dest_choice_column in persons dataframe
    # We only chose school locations for the subset of persons who go to school
    # so we backfill the empty choices with -1 to code as no school location
    # names for location choice and (optional) logsums columns
    NO_DEST_TAZ = -1
    persons_df[dest_choice_column_name] = \
        choices_df['choice'].reindex(persons_df.index).fillna(NO_DEST_TAZ).astype(int)

    # add the dest_choice_logsum column to persons dataframe
    if logsum_column_name:
        persons_df[logsum_column_name] = \
            choices_df['logsum'].reindex(persons_df.index).astype('float')

    if save_sample_df is not None:
        # might be None for tiny samples even if sample_table_name was specified
        assert len(save_sample_df.index.get_level_values(0).unique()) == len(choices_df)
        # lest they try to put school and workplace samples into the same table
        if pipeline.is_table(sample_table_name):
            raise RuntimeError("dest choice sample table %s already exists" % sample_table_name)
        pipeline.extend_table(sample_table_name, save_sample_df)

    # - annotate persons table
    if 'annotate_persons' in model_settings:
        expressions.assign_columns(
            df=persons_df,
            model_settings=model_settings.get('annotate_persons'),
            trace_label=tracing.extend_trace_label(trace_label, 'annotate_persons'))

        pipeline.replace_table("persons", persons_df)

        if trace_hh_id:
            tracing.trace_df(persons_df,
                             label=trace_label,
                             warn_if_empty=True)

    # - annotate households table
    if 'annotate_households' in model_settings:

        households_df = households.to_frame()
        expressions.assign_columns(
            df=households_df,
            model_settings=model_settings.get('annotate_households'),
            trace_label=tracing.extend_trace_label(trace_label, 'annotate_households'))
        pipeline.replace_table("households", households_df)

        if trace_hh_id:
            tracing.trace_df(households_df,
                             label=trace_label,
                             warn_if_empty=True)

    if logsum_column_name:
        tracing.print_summary(logsum_column_name, choices_df['logsum'], value_counts=True)

    return persons_df
def mandatory_tour_frequency(persons_merged,
                             chunk_size,
                             trace_hh_id):
    """
    This model predicts the frequency of making mandatory trips (see the
    alternatives above) - these trips include work and school in some combination.
    """
    trace_label = 'mandatory_tour_frequency'

    model_settings = config.read_model_settings('mandatory_tour_frequency.yaml')
    model_spec = simulate.read_model_spec(file_name='mandatory_tour_frequency.csv')
    alternatives = simulate.read_model_alts(
        config.config_file_path('mandatory_tour_frequency_alternatives.csv'), set_index='alt')

    choosers = persons_merged.to_frame()
    # filter based on results of CDAP
    choosers = choosers[choosers.cdap_activity == 'M']
    logger.info("Running mandatory_tour_frequency with %d persons", len(choosers))

    # - if no mandatory tours
    if choosers.shape[0] == 0:
        add_null_results(trace_label, model_settings)
        return

    # - preprocessor
    preprocessor_settings = model_settings.get('preprocessor', None)
    if preprocessor_settings:

        locals_dict = {}

        expressions.assign_columns(
            df=choosers,
            model_settings=preprocessor_settings,
            locals_dict=locals_dict,
            trace_label=trace_label)

    nest_spec = config.get_logit_model_settings(model_settings)
    constants = config.get_model_constants(model_settings)

    choices = simulate.simple_simulate(
        choosers=choosers,
        spec=model_spec,
        nest_spec=nest_spec,
        locals_d=constants,
        chunk_size=chunk_size,
        trace_label=trace_label,
        trace_choice_name='mandatory_tour_frequency')

    # convert indexes to alternative names
    choices = pd.Series(
        model_spec.columns[choices.values],
        index=choices.index).reindex(persons_merged.local.index)

    # - create mandatory tours
    """
    This reprocesses the choice of index of the mandatory tour frequency
    alternatives into an actual dataframe of tours.  Ending format is
    the same as got non_mandatory_tours except trip types are "work" and "school"
    """
    choosers['mandatory_tour_frequency'] = choices
    mandatory_tours = process_mandatory_tours(
        persons=choosers,
        mandatory_tour_frequency_alts=alternatives
    )

    tours = pipeline.extend_table("tours", mandatory_tours)
    tracing.register_traceable_table('tours', mandatory_tours)
    pipeline.get_rn_generator().add_channel('tours', mandatory_tours)

    # - annotate persons
    persons = inject.get_table('persons').to_frame()

    # need to reindex as we only handled persons with cdap_activity == 'M'
    persons['mandatory_tour_frequency'] = choices.reindex(persons.index).fillna('').astype(str)

    expressions.assign_columns(
        df=persons,
        model_settings=model_settings.get('annotate_persons'),
        trace_label=tracing.extend_trace_label(trace_label, 'annotate_persons'))

    pipeline.replace_table("persons", persons)

    tracing.print_summary('mandatory_tour_frequency', persons.mandatory_tour_frequency,
                          value_counts=True)

    if trace_hh_id:
        tracing.trace_df(mandatory_tours,
                         label="mandatory_tour_frequency.mandatory_tours",
                         warn_if_empty=True)

        tracing.trace_df(persons,
                         label="mandatory_tour_frequency.persons",
                         warn_if_empty=True)
def add_null_results(trace_label, tours):
    logger.info("Skipping %s: add_null_results" % trace_label)
    tours['composition'] = ''
    pipeline.replace_table("tours", tours)
Exemple #30
0
def tour_mode_choice_simulate(tours, persons_merged,
                              skim_dict, skim_stack,
                              chunk_size,
                              trace_hh_id):
    """
    Tour mode choice simulate
    """
    trace_label = 'tour_mode_choice'
    model_settings = config.read_model_settings('tour_mode_choice.yaml')

    spec = tour_mode_choice_spec(model_settings)

    primary_tours = tours.to_frame()

    assert not (primary_tours.tour_category == 'atwork').any()

    persons_merged = persons_merged.to_frame()

    nest_spec = config.get_logit_model_settings(model_settings)
    constants = config.get_model_constants(model_settings)

    logger.info("Running %s with %d tours" % (trace_label, primary_tours.shape[0]))

    tracing.print_summary('tour_types',
                          primary_tours.tour_type, value_counts=True)

    primary_tours_merged = pd.merge(primary_tours, persons_merged, left_on='person_id',
                                    right_index=True, how='left', suffixes=('', '_r'))

    # setup skim keys
    orig_col_name = 'TAZ'
    dest_col_name = 'destination'
    out_time_col_name = 'start'
    in_time_col_name = 'end'
    odt_skim_stack_wrapper = skim_stack.wrap(left_key=orig_col_name, right_key=dest_col_name,
                                             skim_key='out_period')
    dot_skim_stack_wrapper = skim_stack.wrap(left_key=dest_col_name, right_key=orig_col_name,
                                             skim_key='in_period')
    od_skim_stack_wrapper = skim_dict.wrap(orig_col_name, dest_col_name)

    skims = {
        "odt_skims": odt_skim_stack_wrapper,
        "dot_skims": dot_skim_stack_wrapper,
        "od_skims": od_skim_stack_wrapper,
        'orig_col_name': orig_col_name,
        'dest_col_name': dest_col_name,
        'out_time_col_name': out_time_col_name,
        'in_time_col_name': in_time_col_name
    }

    choices_list = []
    for tour_type, segment in primary_tours_merged.groupby('tour_type'):

        logger.info("tour_mode_choice_simulate tour_type '%s' (%s tours)" %
                    (tour_type, len(segment.index), ))

        # name index so tracing knows how to slice
        assert segment.index.name == 'tour_id'

        choices = run_tour_mode_choice_simulate(
            segment,
            spec, tour_type, model_settings,
            skims=skims,
            constants=constants,
            nest_spec=nest_spec,
            chunk_size=chunk_size,
            trace_label=tracing.extend_trace_label(trace_label, tour_type),
            trace_choice_name='tour_mode_choice')

        tracing.print_summary('tour_mode_choice_simulate %s choices' % tour_type,
                              choices, value_counts=True)

        choices_list.append(choices)

        # FIXME - force garbage collection
        force_garbage_collect()

    choices = pd.concat(choices_list)

    tracing.print_summary('tour_mode_choice_simulate all tour type choices',
                          choices, value_counts=True)

    # so we can trace with annotations
    primary_tours['tour_mode'] = choices

    # but only keep mode choice col
    all_tours = tours.to_frame()
    # uncomment to save annotations to table
    # assign_in_place(all_tours, annotations)
    assign_in_place(all_tours, choices.to_frame('tour_mode'))

    pipeline.replace_table("tours", all_tours)

    if trace_hh_id:
        tracing.trace_df(primary_tours,
                         label=tracing.extend_trace_label(trace_label, 'tour_mode'),
                         slicer='tour_id',
                         index_label='tour_id',
                         warn_if_empty=True)
def trip_mode_choice(trips, tours_merged, network_los, chunk_size,
                     trace_hh_id):
    """
    Trip mode choice - compute trip_mode (same values as for tour_mode) for each trip.

    Modes for each primary tour putpose are calculated separately because they have different
    coefficient values (stored in trip_mode_choice_coefficients.csv coefficient file.)

    Adds trip_mode column to trip table
    """
    trace_label = 'trip_mode_choice'
    model_settings_file_name = 'trip_mode_choice.yaml'
    model_settings = config.read_model_settings(model_settings_file_name)

    logsum_column_name = model_settings.get('MODE_CHOICE_LOGSUM_COLUMN_NAME')
    mode_column_name = 'trip_mode'

    trips_df = trips.to_frame()
    logger.info("Running %s with %d trips", trace_label, trips_df.shape[0])

    tours_merged = tours_merged.to_frame()
    tours_merged = tours_merged[model_settings['TOURS_MERGED_CHOOSER_COLUMNS']]

    tracing.print_summary('primary_purpose',
                          trips_df.primary_purpose,
                          value_counts=True)

    # - trips_merged - merge trips and tours_merged
    trips_merged = pd.merge(trips_df,
                            tours_merged,
                            left_on='tour_id',
                            right_index=True,
                            how="left")
    assert trips_merged.index.equals(trips.index)

    # setup skim keys
    assert ('trip_period' not in trips_merged)
    trips_merged['trip_period'] = network_los.skim_time_period_label(
        trips_merged.depart)

    orig_col = 'origin'
    dest_col = 'destination'

    constants = {}
    constants.update(config.get_model_constants(model_settings))
    constants.update({'ORIGIN': orig_col, 'DESTINATION': dest_col})

    skim_dict = network_los.get_default_skim_dict()

    odt_skim_stack_wrapper = skim_dict.wrap_3d(orig_key=orig_col,
                                               dest_key=dest_col,
                                               dim3_key='trip_period')
    dot_skim_stack_wrapper = skim_dict.wrap_3d(orig_key=dest_col,
                                               dest_key=orig_col,
                                               dim3_key='trip_period')
    od_skim_wrapper = skim_dict.wrap('origin', 'destination')

    skims = {
        "odt_skims": odt_skim_stack_wrapper,
        "dot_skims": dot_skim_stack_wrapper,
        "od_skims": od_skim_wrapper,
    }

    if network_los.zone_system == los.THREE_ZONE:
        # fixme - is this a lightweight object?
        tvpb = network_los.tvpb

        tvpb_logsum_odt = tvpb.wrap_logsum(orig_key=orig_col,
                                           dest_key=dest_col,
                                           tod_key='trip_period',
                                           segment_key='demographic_segment',
                                           cache_choices=True,
                                           trace_label=trace_label,
                                           tag='tvpb_logsum_odt')
        skims.update({
            'tvpb_logsum_odt': tvpb_logsum_odt,
            # 'tvpb_logsum_dot': tvpb_logsum_dot
        })

        # TVPB constants can appear in expressions
        constants.update(
            network_los.setting('TVPB_SETTINGS.tour_mode_choice.CONSTANTS'))

    estimator = estimation.manager.begin_estimation('trip_mode_choice')
    if estimator:
        estimator.write_coefficients(model_settings=model_settings)
        estimator.write_coefficients_template(model_settings=model_settings)
        estimator.write_spec(model_settings)
        estimator.write_model_settings(model_settings,
                                       model_settings_file_name)

    model_spec = simulate.read_model_spec(file_name=model_settings['SPEC'])
    nest_spec = config.get_logit_model_settings(model_settings)

    choices_list = []
    for primary_purpose, trips_segment in trips_merged.groupby(
            'primary_purpose'):

        segment_trace_label = tracing.extend_trace_label(
            trace_label, primary_purpose)

        logger.info("trip_mode_choice tour_type '%s' (%s trips)" % (
            primary_purpose,
            len(trips_segment.index),
        ))

        # name index so tracing knows how to slice
        assert trips_segment.index.name == 'trip_id'

        if network_los.zone_system == los.THREE_ZONE:
            tvpb_logsum_odt.extend_trace_label(primary_purpose)
            # tvpb_logsum_dot.extend_trace_label(primary_purpose)

        coefficients = simulate.get_segment_coefficients(
            model_settings, primary_purpose)

        locals_dict = {}
        locals_dict.update(constants)
        locals_dict.update(coefficients)

        expressions.annotate_preprocessors(trips_segment, locals_dict, skims,
                                           model_settings, segment_trace_label)

        if estimator:
            # write choosers after annotation
            estimator.write_choosers(trips_segment)

        locals_dict.update(skims)

        choices = mode_choice_simulate(
            choosers=trips_segment,
            spec=simulate.eval_coefficients(model_spec, coefficients,
                                            estimator),
            nest_spec=simulate.eval_nest_coefficients(nest_spec, coefficients,
                                                      segment_trace_label),
            skims=skims,
            locals_d=locals_dict,
            chunk_size=chunk_size,
            mode_column_name=mode_column_name,
            logsum_column_name=logsum_column_name,
            trace_label=segment_trace_label,
            trace_choice_name='trip_mode_choice',
            estimator=estimator)

        if trace_hh_id:
            # trace the coefficients
            tracing.trace_df(pd.Series(locals_dict),
                             label=tracing.extend_trace_label(
                                 segment_trace_label, 'constants'),
                             transpose=False,
                             slicer='NONE')

            # so we can trace with annotations
            assign_in_place(trips_segment, choices)

            tracing.trace_df(trips_segment,
                             label=tracing.extend_trace_label(
                                 segment_trace_label, 'trip_mode'),
                             slicer='tour_id',
                             index_label='tour_id',
                             warn_if_empty=True)

        choices_list.append(choices)

    choices_df = pd.concat(choices_list)

    # add cached tvpb_logsum tap choices for modes specified in tvpb_mode_path_types
    if network_los.zone_system == los.THREE_ZONE:

        tvpb_mode_path_types = model_settings.get('tvpb_mode_path_types')
        for mode, path_type in tvpb_mode_path_types.items():

            skim_cache = tvpb_logsum_odt.cache[path_type]

            for c in skim_cache:
                dest_col = c
                if dest_col not in choices_df:
                    choices_df[
                        dest_col] = np.nan if pd.api.types.is_numeric_dtype(
                            skim_cache[c]) else ''
                choices_df[dest_col].where(
                    choices_df[mode_column_name] != mode,
                    skim_cache[c],
                    inplace=True)

    if estimator:
        estimator.write_choices(choices_df.trip_mode)
        choices_df.trip_mode = estimator.get_survey_values(
            choices_df.trip_mode, 'trips', 'trip_mode')
        estimator.write_override_choices(choices_df.trip_mode)
        estimator.end_estimation()

    # update trips table with choices (and potionally logssums)
    trips_df = trips.to_frame()
    assign_in_place(trips_df, choices_df)

    tracing.print_summary('trip_modes',
                          trips_merged.tour_mode,
                          value_counts=True)

    tracing.print_summary('trip_mode_choice choices',
                          trips_df[mode_column_name],
                          value_counts=True)

    assert not trips_df[mode_column_name].isnull().any()

    pipeline.replace_table("trips", trips_df)

    if trace_hh_id:
        tracing.trace_df(trips_df,
                         label=tracing.extend_trace_label(
                             trace_label, 'trip_mode'),
                         slicer='trip_id',
                         index_label='trip_id',
                         warn_if_empty=True)
def setup_data_structures(settings, configs_dir, households, persons):
    """
    Setup geographic correspondence (crosswalk), control sets, and incidence tables.

    A control tables for target geographies should already have been read in by running
    input_pre_processor. The zone control tables contains one row for each zone, with columns
    specifying control field totals for that control

    This step reads in the global control file, which specifies which control control fields
    in the control table should be used for balancing, along with their importance and the
    recipe (seed table and expression) for determining household incidence for that control.

    If GROUP_BY_INCIDENCE_SIGNATURE setting is enabled, then incidence table rows are
    household group ids and and additional household_groups table is created mapping hh group ids
    to actual hh_ids.

    Parameters
    ----------
    settings: dict
        contents of settings.yaml as dict
    configs_dir: str
    households: pipeline table
    persons: pipeline table

    creates pipeline tables:
        crosswalk
        controls
        geography-specific controls
        incidence_table
        household_groups (if GROUP_BY_INCIDENCE_SIGNATURE setting is enabled)

    modifies tables:
        households
        persons

    """

    seed_geography = setting('seed_geography')

    households_df = households.to_frame()
    persons_df = persons.to_frame()

    crosswalk_df = build_crosswalk_table()
    inject.add_table('crosswalk', crosswalk_df)

    control_spec = read_control_spec(setting('control_file_name', 'controls.csv'), configs_dir)
    inject.add_table('control_spec', control_spec)

    geographies = settings['geographies']
    for g in geographies:
        controls = build_control_table(g, control_spec, crosswalk_df)
        inject.add_table(control_table_name(g), controls)

    households_df, persons_df = filter_households(households_df, persons_df, crosswalk_df)
    pipeline.replace_table('households', households_df)
    pipeline.replace_table('persons', persons_df)

    incidence_table = \
        build_incidence_table(control_spec, households_df, persons_df, crosswalk_df)

    incidence_table = add_geography_columns(incidence_table, households_df, crosswalk_df)

    # add sample_weight col to incidence table
    hh_weight_col = setting('household_weight_col')
    incidence_table['sample_weight'] = households_df[hh_weight_col]

    if setting('GROUP_BY_INCIDENCE_SIGNATURE') and not setting('NO_INTEGERIZATION_EVER', False):
        group_incidence_table, household_groups \
            = build_grouped_incidence_table(incidence_table, control_spec, seed_geography)

        inject.add_table('household_groups', household_groups)
        inject.add_table('incidence_table', group_incidence_table)
    else:
        inject.add_table('incidence_table', incidence_table)
def trip_purpose_and_destination(
        trips,
        tours_merged,
        chunk_size,
        trace_hh_id):

    trace_label = "trip_purpose_and_destination"
    model_settings = config.read_model_settings('trip_purpose_and_destination.yaml')

    MAX_ITERATIONS = model_settings.get('MAX_ITERATIONS', 5)

    trips_df = trips.to_frame()
    tours_merged_df = tours_merged.to_frame()

    if trips_df.empty:
        logger.info("%s - no trips. Nothing to do." % trace_label)
        return

    # FIXME could allow MAX_ITERATIONS=0 to allow for cleanup-only run
    # in which case, we would need to drop bad trips, WITHOUT failing bad_trip leg_mates
    assert (MAX_ITERATIONS > 0)

    # if trip_destination has been run before, keep only failed trips (and leg_mates) to retry
    if 'destination' in trips_df:
        if trips_df.failed.any():
            logger.info('trip_destination has already been run. Rerunning failed trips')
            flag_failed_trip_leg_mates(trips_df, 'failed')
            trips_df = trips_df[trips_df.failed]
            tours_merged_df = tours_merged_df[tours_merged_df.index.isin(trips_df.tour_id)]
            logger.info('Rerunning %s failed trips and leg-mates' % trips_df.shape[0])
        else:
            # no failed trips from prior run of trip_destination
            logger.info("%s - no failed trips from prior model run." % trace_label)
            del trips_df['failed']
            pipeline.replace_table("trips", trips_df)
            return

    results = []
    i = 0
    RESULT_COLUMNS = ['purpose', 'destination', 'origin', 'failed']
    while True:

        i += 1

        for c in RESULT_COLUMNS:
            if c in trips_df:
                del trips_df[c]

        trips_df = run_trip_purpose_and_destination(
            trips_df,
            tours_merged_df,
            chunk_size,
            trace_hh_id,
            trace_label=tracing.extend_trace_label(trace_label, "i%s" % i))

        num_failed_trips = trips_df.failed.sum()

        # if there were no failed trips, we are done
        if num_failed_trips == 0:
            results.append(trips_df[RESULT_COLUMNS])
            break

        logger.warning("%s %s failed trips in iteration %s" % (trace_label, num_failed_trips, i))
        file_name = "%s_i%s_failed_trips" % (trace_label, i)
        logger.info("writing failed trips to %s" % file_name)
        tracing.write_csv(trips_df[trips_df.failed], file_name=file_name, transpose=False)

        # if max iterations reached, add remaining trips to results and give up
        # note that we do this BEFORE failing leg_mates so resulting trip legs are complete
        if i >= MAX_ITERATIONS:
            logger.warning("%s too many iterations %s" % (trace_label, i))
            results.append(trips_df[RESULT_COLUMNS])
            break

        # otherwise, if any trips failed, then their leg-mates trips must also fail
        flag_failed_trip_leg_mates(trips_df, 'failed')

        # add the good trips to results
        results.append(trips_df[~trips_df.failed][RESULT_COLUMNS])

        # and keep the failed ones to retry
        trips_df = trips_df[trips_df.failed]
        tours_merged_df = tours_merged_df[tours_merged_df.index.isin(trips_df.tour_id)]

    # - assign result columns to trips
    results = pd.concat(results)

    logger.info("%s %s failed trips after %s iterations" % (trace_label, results.failed.sum(), i))

    trips_df = trips.to_frame()
    assign_in_place(trips_df, results)

    trips_df = cleanup_failed_trips(trips_df)

    pipeline.replace_table("trips", trips_df)

    if trace_hh_id:
        tracing.trace_df(trips_df,
                         label=trace_label,
                         slicer='trip_id',
                         index_label='trip_id',
                         warn_if_empty=True)
Exemple #34
0
def tour_mode_choice_simulate(tours, persons_merged, skim_dict, skim_stack,
                              chunk_size, trace_hh_id):
    """
    Tour mode choice simulate
    """
    trace_label = 'tour_mode_choice'
    model_settings = config.read_model_settings('tour_mode_choice.yaml')

    spec = tour_mode_choice_spec(model_settings)

    primary_tours = tours.to_frame()

    assert not (primary_tours.tour_category == 'atwork').any()

    persons_merged = persons_merged.to_frame()

    nest_spec = config.get_logit_model_settings(model_settings)
    constants = config.get_model_constants(model_settings)

    logger.info("Running %s with %d tours" %
                (trace_label, primary_tours.shape[0]))

    tracing.print_summary('tour_types',
                          primary_tours.tour_type,
                          value_counts=True)

    primary_tours_merged = pd.merge(primary_tours,
                                    persons_merged,
                                    left_on='person_id',
                                    right_index=True,
                                    how='left',
                                    suffixes=('', '_r'))

    # setup skim keys
    orig_col_name = 'TAZ'
    dest_col_name = 'destination'
    out_time_col_name = 'start'
    in_time_col_name = 'end'
    odt_skim_stack_wrapper = skim_stack.wrap(left_key=orig_col_name,
                                             right_key=dest_col_name,
                                             skim_key='out_period')
    dot_skim_stack_wrapper = skim_stack.wrap(left_key=dest_col_name,
                                             right_key=orig_col_name,
                                             skim_key='in_period')
    od_skim_stack_wrapper = skim_dict.wrap(orig_col_name, dest_col_name)

    skims = {
        "odt_skims": odt_skim_stack_wrapper,
        "dot_skims": dot_skim_stack_wrapper,
        "od_skims": od_skim_stack_wrapper,
        'orig_col_name': orig_col_name,
        'dest_col_name': dest_col_name,
        'out_time_col_name': out_time_col_name,
        'in_time_col_name': in_time_col_name
    }

    choices_list = []
    for tour_type, segment in primary_tours_merged.groupby('tour_type'):

        logger.info("tour_mode_choice_simulate tour_type '%s' (%s tours)" % (
            tour_type,
            len(segment.index),
        ))

        # name index so tracing knows how to slice
        assert segment.index.name == 'tour_id'

        choices = run_tour_mode_choice_simulate(
            segment,
            spec,
            tour_type,
            model_settings,
            skims=skims,
            constants=constants,
            nest_spec=nest_spec,
            chunk_size=chunk_size,
            trace_label=tracing.extend_trace_label(trace_label, tour_type),
            trace_choice_name='tour_mode_choice')

        tracing.print_summary('tour_mode_choice_simulate %s choices' %
                              tour_type,
                              choices,
                              value_counts=True)

        choices_list.append(choices)

        # FIXME - force garbage collection
        force_garbage_collect()

    choices = pd.concat(choices_list)

    tracing.print_summary('tour_mode_choice_simulate all tour type choices',
                          choices,
                          value_counts=True)

    # so we can trace with annotations
    primary_tours['tour_mode'] = choices

    # but only keep mode choice col
    all_tours = tours.to_frame()
    # uncomment to save annotations to table
    # assign_in_place(all_tours, annotations)
    assign_in_place(all_tours, choices.to_frame('tour_mode'))

    pipeline.replace_table("tours", all_tours)

    if trace_hh_id:
        tracing.trace_df(primary_tours,
                         label=tracing.extend_trace_label(
                             trace_label, 'tour_mode'),
                         slicer='tour_id',
                         index_label='tour_id',
                         warn_if_empty=True)
Exemple #35
0
def iterate_location_choice(
        model_settings,
        persons_merged, persons, households,
        skim_dict, skim_stack,
        chunk_size, trace_hh_id, locutor,
        trace_label):
    """
    iterate run_location_choice updating shadow pricing until convergence criteria satisfied
    or max_iterations reached.

    (If use_shadow_pricing not enabled, then just iterate once)

    Parameters
    ----------
    model_settings : dict
    persons_merged : injected table
    persons : injected table
    skim_dict : skim.SkimDict
    skim_stack : skim.SkimStack
    chunk_size : int
    trace_hh_id : int
    locutor : bool
        whether this process is the privileged logger of shadow_pricing when multiprocessing
    trace_label : str

    Returns
    -------
    adds choice column model_settings['DEST_CHOICE_COLUMN_NAME'] and annotations to persons table
    """

    # column containing segment id
    chooser_segment_column = model_settings['CHOOSER_SEGMENT_COLUMN_NAME']

    # boolean to filter out persons not needing location modeling (e.g. is_worker, is_student)
    chooser_filter_column = model_settings['CHOOSER_FILTER_COLUMN_NAME']

    persons_merged_df = persons_merged.to_frame()

    persons_merged_df = persons_merged_df[persons_merged[chooser_filter_column]]

    spc = shadow_pricing.load_shadow_price_calculator(model_settings)
    max_iterations = spc.max_iterations

    logging.debug("%s max_iterations: %s" % (trace_label, max_iterations))

    choices = None
    for iteration in range(1, max_iterations + 1):

        if spc.use_shadow_pricing and iteration > 1:
            spc.update_shadow_prices()

        choices = run_location_choice(
            persons_merged_df,
            skim_dict, skim_stack,
            spc,
            model_settings,
            chunk_size, trace_hh_id,
            trace_label=tracing.extend_trace_label(trace_label, 'i%s' % iteration))

        choices_df = choices.to_frame('dest_choice')
        choices_df['segment_id'] = \
            persons_merged_df[chooser_segment_column].reindex(choices_df.index)

        spc.set_choices(choices_df)

        if locutor:
            spc.write_trace_files(iteration)

        if spc.use_shadow_pricing and spc.check_fit(iteration):
            logging.info("%s converged after iteration %s" % (trace_label, iteration,))
            break

    # - shadow price table
    if locutor:
        if spc.use_shadow_pricing and 'SHADOW_PRICE_TABLE' in model_settings:
            inject.add_table(model_settings['SHADOW_PRICE_TABLE'], spc.shadow_prices)
        if 'MODELED_SIZE_TABLE' in model_settings:
            inject.add_table(model_settings['MODELED_SIZE_TABLE'], spc.modeled_size)

    dest_choice_column_name = model_settings['DEST_CHOICE_COLUMN_NAME']
    tracing.print_summary(dest_choice_column_name, choices, value_counts=True)

    persons_df = persons.to_frame()

    # We only chose school locations for the subset of persons who go to school
    # so we backfill the empty choices with -1 to code as no school location
    NO_DEST_TAZ = -1
    persons_df[dest_choice_column_name] = \
        choices.reindex(persons_df.index).fillna(NO_DEST_TAZ).astype(int)

    # - annotate persons table
    if 'annotate_persons' in model_settings:
        expressions.assign_columns(
            df=persons_df,
            model_settings=model_settings.get('annotate_persons'),
            trace_label=tracing.extend_trace_label(trace_label, 'annotate_persons'))

        pipeline.replace_table("persons", persons_df)

        if trace_hh_id:
            tracing.trace_df(persons_df,
                             label=trace_label,
                             warn_if_empty=True)

    # - annotate households table
    if 'annotate_households' in model_settings:

        households_df = households.to_frame()
        expressions.assign_columns(
            df=households_df,
            model_settings=model_settings.get('annotate_households'),
            trace_label=tracing.extend_trace_label(trace_label, 'annotate_households'))
        pipeline.replace_table("households", households_df)

        if trace_hh_id:
            tracing.trace_df(households_df,
                             label=trace_label,
                             warn_if_empty=True)

    return persons_df
def repop_setup_data_structures(configs_dir, households, persons):
    """
    Setup geographic correspondence (crosswalk), control sets, and incidence tables for repop run.

    A new lowest-level geography control tables should already have been read in by rerunning
    input_pre_processor with a table_list override. The control table contains one row for
    each zone, with columns specifying control field totals for that control

    This step reads in the repop control file, which specifies which control control fields
    in the control table should be used for balancing, along with their importance and the
    recipe (seed table and expression) for determining household incidence for that control.

    Parameters
    ----------
    configs_dir : str
    households: pipeline table
    persons: pipeline table

    Returns
    -------

    """

    seed_geography = setting('seed_geography')
    geographies = setting('geographies')
    low_geography = geographies[-1]

    # replace crosswalk table
    crosswalk_df = build_crosswalk_table()
    pipeline.replace_table('crosswalk', crosswalk_df)

    # replace control_spec
    control_file_name = setting('repop_control_file_name', 'repop_controls.csv')
    control_spec = read_control_spec(control_file_name, configs_dir)

    # repop control spec should only specify controls for lowest level geography
    assert control_spec.geography.unique() == [low_geography]

    pipeline.replace_table('control_spec', control_spec)

    # build incidence_table with repop controls and households in repop zones
    # filter households (dropping any not in crosswalk) in order to build incidence_table
    # We DO NOT REPLACE households and persons as we need full tables to synthesize population
    # (There is no problem, however, with overwriting the incidence_table and household_groups
    # because the expand_households step has ALREADY created the expanded_household_ids table
    # for the original simulated population. )

    households_df = households.to_frame()
    persons_df = persons.to_frame()
    households_df, persons_df = filter_households(households_df, persons_df, crosswalk_df)
    incidence_table = build_incidence_table(control_spec, households_df, persons_df, crosswalk_df)
    incidence_table = add_geography_columns(incidence_table, households_df, crosswalk_df)
    # add sample_weight col to incidence table
    hh_weight_col = setting('household_weight_col')
    incidence_table['sample_weight'] = households_df[hh_weight_col]

    # rebuild control tables with only the low level controls (aggregated at higher levels)
    for g in geographies:
        controls = build_control_table(g, control_spec, crosswalk_df)
        pipeline.replace_table(control_table_name(g), controls)

    if setting('GROUP_BY_INCIDENCE_SIGNATURE') and not setting('NO_INTEGERIZATION_EVER', False):
        group_incidence_table, household_groups \
            = build_grouped_incidence_table(incidence_table, control_spec, seed_geography)

        pipeline.replace_table('household_groups', household_groups)
        pipeline.replace_table('incidence_table', group_incidence_table)
    else:
        pipeline.replace_table('incidence_table', incidence_table)