Ejemplo n.º 1
0
def atwork_subtour_destination_logsums(
        persons_merged,
        destination_sample,
        skim_dict, skim_stack,
        chunk_size, trace_hh_id):
    """
    add logsum column to existing atwork_subtour_destination_sample table

    logsum is calculated by running the mode_choice model for each sample (person, dest_taz) pair
    in atwork_subtour_destination_sample, and computing the logsum of all the utilities

    +-----------+--------------+----------------+------------+----------------+
    | person_id | dest_TAZ     | rand           | pick_count | logsum (added) |
    +===========+==============+================+============+================+
    | 23750     |  14          | 0.565502716034 | 4          |  1.85659498857 |
    +-----------+--------------+----------------+------------+----------------+
    + 23750     | 16           | 0.711135838871 | 6          | 1.92315598631  |
    +-----------+--------------+----------------+------------+----------------+
    + ...       |              |                |            |                |
    +-----------+--------------+----------------+------------+----------------+
    | 23751     | 12           | 0.408038878552 | 1          | 2.40612135416  |
    +-----------+--------------+----------------+------------+----------------+
    | 23751     | 14           | 0.972732479292 | 2          |  1.44009018355 |
    +-----------+--------------+----------------+------------+----------------+

    """

    trace_label = 'atwork_subtour_destination_logsums'

    model_settings = config.read_model_settings('atwork_subtour_destination.yaml')
    logsum_settings = config.read_model_settings(model_settings['LOGSUM_SETTINGS'])

    # FIXME - MEMORY HACK - only include columns actually used in spec
    persons_merged = logsum.filter_chooser_columns(persons_merged, logsum_settings, model_settings)

    # merge persons into tours
    choosers = pd.merge(destination_sample,
                        persons_merged,
                        left_on='person_id',
                        right_index=True,
                        how="left")

    logger.info("Running %s with %s rows", trace_label, len(choosers))

    tracing.dump_df(DUMP, persons_merged, trace_label, 'persons_merged')
    tracing.dump_df(DUMP, choosers, trace_label, 'choosers')

    tour_purpose = 'atwork'
    logsums = logsum.compute_logsums(
        choosers,
        tour_purpose,
        logsum_settings, model_settings,
        skim_dict, skim_stack,
        chunk_size, trace_hh_id,
        trace_label)

    destination_sample['mode_choice_logsum'] = logsums

    return destination_sample
Ejemplo n.º 2
0
def atwork_subtour_destination_logsums(persons_merged, destination_sample,
                                       skim_dict, skim_stack, chunk_size,
                                       trace_hh_id):
    """
    add logsum column to existing atwork_subtour_destination_sample table

    logsum is calculated by running the mode_choice model for each sample (person, dest_taz) pair
    in atwork_subtour_destination_sample, and computing the logsum of all the utilities

    +-----------+--------------+----------------+------------+----------------+
    | person_id | dest_TAZ     | rand           | pick_count | logsum (added) |
    +===========+==============+================+============+================+
    | 23750     |  14          | 0.565502716034 | 4          |  1.85659498857 |
    +-----------+--------------+----------------+------------+----------------+
    + 23750     | 16           | 0.711135838871 | 6          | 1.92315598631  |
    +-----------+--------------+----------------+------------+----------------+
    + ...       |              |                |            |                |
    +-----------+--------------+----------------+------------+----------------+
    | 23751     | 12           | 0.408038878552 | 1          | 2.40612135416  |
    +-----------+--------------+----------------+------------+----------------+
    | 23751     | 14           | 0.972732479292 | 2          |  1.44009018355 |
    +-----------+--------------+----------------+------------+----------------+

    """

    trace_label = 'atwork_subtour_destination_logsums'

    model_settings = config.read_model_settings(
        'atwork_subtour_destination.yaml')
    logsum_settings = config.read_model_settings(
        model_settings['LOGSUM_SETTINGS'])

    # FIXME - MEMORY HACK - only include columns actually used in spec
    persons_merged = logsum.filter_chooser_columns(persons_merged,
                                                   logsum_settings,
                                                   model_settings)

    # merge persons into tours
    choosers = pd.merge(destination_sample,
                        persons_merged,
                        left_on='person_id',
                        right_index=True,
                        how="left")

    logger.info("Running %s with %s rows", trace_label, len(choosers))

    tracing.dump_df(DUMP, persons_merged, trace_label, 'persons_merged')
    tracing.dump_df(DUMP, choosers, trace_label, 'choosers')

    tour_purpose = 'atwork'
    logsums = logsum.compute_logsums(choosers, tour_purpose, logsum_settings,
                                     model_settings, skim_dict, skim_stack,
                                     chunk_size, trace_hh_id, trace_label)

    destination_sample['mode_choice_logsum'] = logsums

    return destination_sample
Ejemplo n.º 3
0
def mandatory_tour_scheduling(tours, persons_merged, tdd_alts, chunk_size,
                              trace_hh_id):
    """
    This model predicts the departure time and duration of each activity for mandatory tours
    """

    model_name = 'mandatory_tour_scheduling'
    trace_label = model_name

    persons_merged = persons_merged.to_frame()

    tours = tours.to_frame()
    mandatory_tours = tours[tours.tour_category == 'mandatory']

    # - if no mandatory_tours
    if mandatory_tours.shape[0] == 0:
        tracing.no_results(model_name)
        return

    # - add tour segmentation column
    # mtctm1 segments mandatory_scheduling spec by tour_type
    # (i.e. there are different specs for work and school tour_types)
    # mtctm1 logsum coefficients are segmented by primary_purpose
    # (i.e. there are different logsum coefficients for work, school, univ primary_purposes
    # for simplicity managing these different segmentation schemes,
    # we conflate them by segmenting tour processing to align with primary_purpose
    tour_segment_col = 'mandatory_tour_seg'
    assert tour_segment_col not in mandatory_tours
    is_university_tour = \
        (mandatory_tours.tour_type == 'school') & \
        reindex(persons_merged.is_university, mandatory_tours.person_id)
    mandatory_tours[tour_segment_col] = \
        mandatory_tours.tour_type.where(~is_university_tour, 'univ')

    choices = run_tour_scheduling(model_name, mandatory_tours, persons_merged,
                                  tdd_alts, tour_segment_col, chunk_size,
                                  trace_hh_id)

    assign_in_place(tours, choices)
    pipeline.replace_table("tours", tours)

    # updated df for tracing
    mandatory_tours = tours[tours.tour_category == 'mandatory']

    tracing.dump_df(DUMP, tt.tour_map(persons_merged, mandatory_tours,
                                      tdd_alts), trace_label, 'tour_map')

    if trace_hh_id:
        tracing.trace_df(mandatory_tours,
                         label=trace_label,
                         slicer='person_id',
                         index_label='tour',
                         columns=None,
                         warn_if_empty=True)
Ejemplo n.º 4
0
def non_mandatory_tour_scheduling(tours, persons_merged, tdd_alts,
                                  tdd_non_mandatory_spec,
                                  non_mandatory_tour_scheduling_settings,
                                  chunk_size, trace_hh_id):
    """
    This model predicts the departure time and duration of each activity for non-mandatory tours
    """

    trace_label = 'non_mandatory_tour_scheduling'

    tours = tours.to_frame()
    persons_merged = persons_merged.to_frame()

    non_mandatory_tours = tours[tours.non_mandatory]

    logger.info("Running non_mandatory_tour_scheduling with %d tours" %
                len(tours))

    constants = config.get_model_constants(
        non_mandatory_tour_scheduling_settings)

    tdd_choices = vectorize_tour_scheduling(non_mandatory_tours,
                                            persons_merged,
                                            tdd_alts,
                                            tdd_non_mandatory_spec,
                                            constants=constants,
                                            chunk_size=chunk_size,
                                            trace_label=trace_label)

    # add tdd_choices columns to tours
    for c in tdd_choices.columns:
        tours.loc[tdd_choices.index, c] = tdd_choices[c]

    pipeline.replace_table("tours", tours)

    non_mandatory_tours = tours[tours.non_mandatory]

    tracing.dump_df(DUMP,
                    tt.tour_map(persons_merged, non_mandatory_tours, tdd_alts),
                    trace_label, 'tour_map')

    if trace_hh_id:
        tracing.trace_df(non_mandatory_tours,
                         label="non_mandatory_tour_scheduling",
                         slicer='person_id',
                         index_label='tour_id',
                         columns=None,
                         warn_if_empty=True)
Ejemplo n.º 5
0
def non_mandatory_tour_scheduling(tours, persons_merged, tdd_alts, chunk_size,
                                  trace_hh_id):
    """
    This model predicts the departure time and duration of each activity for non-mandatory tours
    """

    model_name = 'non_mandatory_tour_scheduling'
    trace_label = model_name

    persons_merged = persons_merged.to_frame()

    tours = tours.to_frame()
    non_mandatory_tours = tours[tours.tour_category == 'non_mandatory']

    # - if no mandatory_tours
    if non_mandatory_tours.shape[0] == 0:
        tracing.no_results(model_name)
        return

    tour_segment_col = None

    choices = run_tour_scheduling(model_name, non_mandatory_tours,
                                  persons_merged, tdd_alts, tour_segment_col,
                                  chunk_size, trace_hh_id)

    assign_in_place(tours, choices)
    pipeline.replace_table("tours", tours)

    # updated df for tracing
    non_mandatory_tours = tours[tours.tour_category == 'non_mandatory']

    tracing.dump_df(DUMP,
                    tt.tour_map(persons_merged, non_mandatory_tours, tdd_alts),
                    trace_label, 'tour_map')

    if trace_hh_id:
        tracing.trace_df(non_mandatory_tours,
                         label=trace_label,
                         slicer='person_id',
                         index_label='tour_id',
                         columns=None,
                         warn_if_empty=True)
Ejemplo n.º 6
0
def run_destination_logsums(
        tour_purpose,
        persons_merged,
        destination_sample,
        model_settings,
        skim_dict, skim_stack,
        chunk_size, trace_hh_id, trace_label):
    """
    add logsum column to existing tour_destination_sample table

    logsum is calculated by running the mode_choice model for each sample (person, dest_taz) pair
    in destination_sample, and computing the logsum of all the utilities
    """

    logsum_settings = config.read_model_settings(model_settings['LOGSUM_SETTINGS'])

    # FIXME - MEMORY HACK - only include columns actually used in spec
    persons_merged = logsum.filter_chooser_columns(persons_merged, logsum_settings, model_settings)

    # merge persons into tours
    choosers = pd.merge(destination_sample,
                        persons_merged,
                        left_on='person_id',
                        right_index=True,
                        how="left")

    logger.info("Running %s with %s rows", trace_label, len(choosers))

    tracing.dump_df(DUMP, persons_merged, trace_label, 'persons_merged')
    tracing.dump_df(DUMP, choosers, trace_label, 'choosers')

    logsums = logsum.compute_logsums(
        choosers,
        tour_purpose,
        logsum_settings, model_settings,
        skim_dict, skim_stack,
        chunk_size, trace_hh_id,
        trace_label)

    destination_sample['mode_choice_logsum'] = logsums

    return destination_sample
Ejemplo n.º 7
0
def run_destination_logsums(
        tour_purpose,
        persons_merged,
        destination_sample,
        model_settings,
        network_los,
        chunk_size, trace_hh_id, trace_label):
    """
    add logsum column to existing tour_destination_sample table

    logsum is calculated by running the mode_choice model for each sample (person, dest_zone_id) pair
    in destination_sample, and computing the logsum of all the utilities
    """

    logsum_settings = config.read_model_settings(model_settings['LOGSUM_SETTINGS'])

    # FIXME - MEMORY HACK - only include columns actually used in spec
    persons_merged = logsum.filter_chooser_columns(persons_merged, logsum_settings, model_settings)

    # merge persons into tours
    choosers = pd.merge(destination_sample,
                        persons_merged,
                        left_on='person_id',
                        right_index=True,
                        how="left")

    logger.info("Running %s with %s rows", trace_label, len(choosers))

    tracing.dump_df(DUMP, persons_merged, trace_label, 'persons_merged')
    tracing.dump_df(DUMP, choosers, trace_label, 'choosers')

    logsums = logsum.compute_logsums(
        choosers,
        tour_purpose,
        logsum_settings, model_settings,
        network_los,
        chunk_size,
        trace_label)

    destination_sample['mode_choice_logsum'] = logsums

    return destination_sample
Ejemplo n.º 8
0
def non_mandatory_tour_scheduling(tours, persons_merged, tdd_alts, chunk_size,
                                  trace_hh_id):
    """
    This model predicts the departure time and duration of each activity for non-mandatory tours
    """

    trace_label = 'non_mandatory_tour_scheduling'
    model_settings = config.read_model_settings(
        'non_mandatory_tour_scheduling.yaml')

    model_spec = simulate.read_model_spec(
        file_name='tour_scheduling_nonmandatory.csv')
    segment_col = None  # no segmentation of model_spec

    tours = tours.to_frame()
    non_mandatory_tours = tours[tours.tour_category == 'non_mandatory']

    logger.info("Running non_mandatory_tour_scheduling with %d tours",
                len(tours))

    persons_merged = persons_merged.to_frame()

    if 'SIMULATE_CHOOSER_COLUMNS' in model_settings:
        persons_merged =\
            expressions.filter_chooser_columns(persons_merged,
                                               model_settings['SIMULATE_CHOOSER_COLUMNS'])

    constants = config.get_model_constants(model_settings)

    # - run preprocessor to annotate choosers
    preprocessor_settings = model_settings.get('preprocessor', None)
    if preprocessor_settings:

        locals_d = {}
        if constants is not None:
            locals_d.update(constants)

        expressions.assign_columns(df=non_mandatory_tours,
                                   model_settings=preprocessor_settings,
                                   locals_dict=locals_d,
                                   trace_label=trace_label)

    tdd_choices, timetable = vectorize_tour_scheduling(
        non_mandatory_tours,
        persons_merged,
        tdd_alts,
        model_spec,
        segment_col,
        model_settings=model_settings,
        chunk_size=chunk_size,
        trace_label=trace_label)

    timetable.replace_table()

    assign_in_place(tours, tdd_choices)
    pipeline.replace_table("tours", tours)

    # updated df for tracing
    non_mandatory_tours = tours[tours.tour_category == 'non_mandatory']

    tracing.dump_df(DUMP,
                    tt.tour_map(persons_merged, non_mandatory_tours, tdd_alts),
                    trace_label, 'tour_map')

    if trace_hh_id:
        tracing.trace_df(non_mandatory_tours,
                         label="non_mandatory_tour_scheduling",
                         slicer='person_id',
                         index_label='tour_id',
                         columns=None,
                         warn_if_empty=True)
def non_mandatory_tour_scheduling(tours, persons_merged, tdd_alts, chunk_size,
                                  trace_hh_id):
    """
    This model predicts the departure time and duration of each activity for non-mandatory tours
    """

    trace_label = 'non_mandatory_tour_scheduling'
    model_settings_file_name = 'non_mandatory_tour_scheduling.yaml'
    model_settings = config.read_model_settings(model_settings_file_name)

    tours = tours.to_frame()
    non_mandatory_tours = tours[tours.tour_category == 'non_mandatory']

    logger.info("Running non_mandatory_tour_scheduling with %d tours",
                len(tours))

    persons_merged = persons_merged.to_frame()

    if 'SIMULATE_CHOOSER_COLUMNS' in model_settings:
        persons_merged =\
            expressions.filter_chooser_columns(persons_merged,
                                               model_settings['SIMULATE_CHOOSER_COLUMNS'])

    constants = config.get_model_constants(model_settings)

    # - run preprocessor to annotate choosers
    preprocessor_settings = model_settings.get('preprocessor', None)
    if preprocessor_settings:

        locals_d = {}
        if constants is not None:
            locals_d.update(constants)

        expressions.assign_columns(df=non_mandatory_tours,
                                   model_settings=preprocessor_settings,
                                   locals_dict=locals_d,
                                   trace_label=trace_label)

    timetable = inject.get_injectable("timetable")

    estimator = estimation.manager.begin_estimation(
        'non_mandatory_tour_scheduling')

    model_spec = simulate.read_model_spec(file_name=model_settings['SPEC'])
    coefficients_df = simulate.read_model_coefficients(model_settings)
    model_spec = simulate.eval_coefficients(model_spec, coefficients_df,
                                            estimator)

    if estimator:
        estimator.write_model_settings(model_settings,
                                       model_settings_file_name)
        estimator.write_spec(model_settings)
        estimator.write_coefficients(coefficients_df)
        timetable.begin_transaction(estimator)

    # - non_mandatory tour scheduling is not segmented by tour type
    spec_info = {'spec': model_spec, 'estimator': estimator}

    choices = vectorize_tour_scheduling(non_mandatory_tours,
                                        persons_merged,
                                        tdd_alts,
                                        timetable,
                                        tour_segments=spec_info,
                                        tour_segment_col=None,
                                        model_settings=model_settings,
                                        chunk_size=chunk_size,
                                        trace_label=trace_label)

    if estimator:
        estimator.write_choices(choices)
        choices = estimator.get_survey_values(choices, 'tours', 'tdd')
        estimator.write_override_choices(choices)
        estimator.end_estimation()

        # update timetable to reflect the override choices (assign tours in tour_num order)
        timetable.rollback()
        for tour_num, nth_tours in non_mandatory_tours.groupby('tour_num',
                                                               sort=True):
            timetable.assign(window_row_ids=nth_tours['person_id'],
                             tdds=choices.reindex(nth_tours.index))

    timetable.replace_table()

    # choices are tdd alternative ids
    # we want to add start, end, and duration columns to tours, which we have in tdd_alts table
    choices = pd.merge(choices.to_frame('tdd'),
                       tdd_alts,
                       left_on=['tdd'],
                       right_index=True,
                       how='left')

    assign_in_place(tours, choices)
    pipeline.replace_table("tours", tours)

    # updated df for tracing
    non_mandatory_tours = tours[tours.tour_category == 'non_mandatory']

    tracing.dump_df(DUMP,
                    tt.tour_map(persons_merged, non_mandatory_tours, tdd_alts),
                    trace_label, 'tour_map')

    if trace_hh_id:
        tracing.trace_df(non_mandatory_tours,
                         label="non_mandatory_tour_scheduling",
                         slicer='person_id',
                         index_label='tour_id',
                         columns=None,
                         warn_if_empty=True)
Ejemplo n.º 10
0
def run_od_simulate(spec_segment_name, tours, od_sample, want_logsums,
                    model_settings, network_los, destination_size_terms,
                    estimator, chunk_size, trace_label):
    """
    run simulate OD choices on tour_od_sample annotated with mode_choice
    logsum to select a tour OD from sample alternatives
    """

    model_spec = simulate.spec_for_segment(model_settings,
                                           spec_id='SPEC',
                                           segment_name=spec_segment_name,
                                           estimator=estimator)

    # merge persons into tours
    choosers = tours

    # FIXME - MEMORY HACK - only include columns actually used in spec
    chooser_columns = model_settings['SIMULATE_CHOOSER_COLUMNS']
    choosers = choosers[chooser_columns]

    # interaction_sample requires that choosers.index.is_monotonic_increasing
    if not choosers.index.is_monotonic_increasing:
        logger.debug(
            f"run_destination_simulate {trace_label} sorting choosers because not monotonic_increasing"
        )
        choosers = choosers.sort_index()

    if estimator:
        estimator.write_choosers(choosers)

    origin_col_name = model_settings['ORIG_COL_NAME']
    dest_col_name = model_settings['DEST_COL_NAME']
    alt_dest_col_name = model_settings['ALT_DEST_COL_NAME']
    origin_attr_cols = model_settings['ORIGIN_ATTR_COLS_TO_USE']

    alt_od_col_name = get_od_id_col(origin_col_name, dest_col_name)
    od_sample[alt_od_col_name] = create_od_id_col(od_sample, origin_col_name,
                                                  dest_col_name)

    # alternatives are pre-sampled and annotated with logsums and pick_count
    # but we have to merge size_terms column into alt sample list
    od_sample['size_term'] = \
        reindex(destination_size_terms.size_term, od_sample[alt_dest_col_name])

    # also have to add origin attribute columns
    lu = inject.get_table('land_use').to_frame(columns=origin_attr_cols)
    od_sample = pd.merge(od_sample,
                         lu,
                         left_on=origin_col_name,
                         right_index=True,
                         how='left')

    tracing.dump_df(DUMP, od_sample, trace_label, 'alternatives')

    constants = config.get_model_constants(model_settings)

    logger.info("Running tour_destination_simulate with %d persons",
                len(choosers))

    # create wrapper with keys for this lookup - in this case there is an origin ID
    # column and a destination ID columns in the alternatives table.
    # the skims will be available under the name "skims" for any @ expressions
    skim_dict = network_los.get_default_skim_dict()
    skims = skim_dict.wrap(origin_col_name, dest_col_name)

    locals_d = {
        'skims': skims,
    }
    if constants is not None:
        locals_d.update(constants)

    tracing.dump_df(DUMP, choosers, trace_label, 'choosers')
    choices = interaction_sample_simulate(
        choosers,
        od_sample,
        spec=model_spec,
        choice_column=alt_od_col_name,
        want_logsums=want_logsums,
        skims=skims,
        locals_d=locals_d,
        chunk_size=chunk_size,
        trace_label=trace_label,
        trace_choice_name='origin_destination',
        estimator=estimator)

    if not want_logsums:
        choices = choices.to_frame('choice')

    choices = _get_od_cols_from_od_id(choices, origin_col_name, dest_col_name)

    return choices
Ejemplo n.º 11
0
def atwork_subtour_scheduling(tours, persons_merged, tdd_alts, skim_dict,
                              chunk_size, trace_hh_id):
    """
    This model predicts the departure time and duration of each activity for at work subtours tours
    """

    trace_label = 'atwork_subtour_scheduling'
    model_settings = config.read_model_settings('tour_scheduling_atwork.yaml')
    model_spec = simulate.read_model_spec(
        file_name='tour_scheduling_atwork.csv')

    persons_merged = persons_merged.to_frame()

    tours = tours.to_frame()
    subtours = tours[tours.tour_category == 'atwork']

    # - if no atwork subtours
    if subtours.shape[0] == 0:
        tracing.no_results(trace_label)
        return

    logger.info("Running %s with %d tours", trace_label, len(subtours))

    # preprocessor
    constants = config.get_model_constants(model_settings)
    od_skim_wrapper = skim_dict.wrap('origin', 'destination')
    do_skim_wrapper = skim_dict.wrap('destination', 'origin')
    skims = {
        "od_skims": od_skim_wrapper,
        "do_skims": do_skim_wrapper,
    }
    annotate_preprocessors(subtours, constants, skims, model_settings,
                           trace_label)

    # parent_tours table with columns ['tour_id', 'tdd'] index = tour_id
    parent_tour_ids = subtours.parent_tour_id.astype(int).unique()
    parent_tours = pd.DataFrame({'tour_id': parent_tour_ids},
                                index=parent_tour_ids)
    parent_tours = parent_tours.merge(tours[['tdd']],
                                      left_index=True,
                                      right_index=True)

    tdd_choices = vectorize_subtour_scheduling(parent_tours,
                                               subtours,
                                               persons_merged,
                                               tdd_alts,
                                               model_spec,
                                               model_settings,
                                               chunk_size=chunk_size,
                                               trace_label=trace_label)

    assign_in_place(tours, tdd_choices)
    pipeline.replace_table("tours", tours)

    if trace_hh_id:
        tracing.trace_df(tours[tours.tour_category == 'atwork'],
                         label="atwork_subtour_scheduling",
                         slicer='person_id',
                         index_label='tour_id',
                         columns=None)

    if DUMP:
        subtours = tours[tours.tour_category == 'atwork']
        parent_tours = tours[tours.index.isin(subtours.parent_tour_id)]

        tracing.dump_df(DUMP, subtours, trace_label, 'sub_tours')
        tracing.dump_df(DUMP, parent_tours, trace_label, 'parent_tours')

        parent_tours['parent_tour_id'] = parent_tours.index
        subtours = pd.concat([parent_tours, subtours])
        tracing.dump_df(
            DUMP,
            tt.tour_map(parent_tours,
                        subtours,
                        tdd_alts,
                        persons_id_col='parent_tour_id'), trace_label,
            'tour_map')
Ejemplo n.º 12
0
def run_destination_simulate(spec_segment_name, tours, persons_merged,
                             destination_sample, want_logsums, model_settings,
                             network_los, destination_size_terms, estimator,
                             chunk_size, trace_label):
    """
    run destination_simulate on tour_destination_sample
    annotated with mode_choice logsum to select a destination from sample alternatives
    """

    model_spec = simulate.spec_for_segment(model_settings,
                                           spec_id='SPEC',
                                           segment_name=spec_segment_name,
                                           estimator=estimator)

    # FIXME - MEMORY HACK - only include columns actually used in spec (omit them pre-merge)
    chooser_columns = model_settings['SIMULATE_CHOOSER_COLUMNS']
    persons_merged = persons_merged[[
        c for c in persons_merged.columns if c in chooser_columns
    ]]
    tours = tours[[
        c for c in tours.columns if c in chooser_columns or c == 'person_id'
    ]]
    choosers = pd.merge(tours,
                        persons_merged,
                        left_on='person_id',
                        right_index=True,
                        how='left')

    # interaction_sample requires that choosers.index.is_monotonic_increasing
    if not choosers.index.is_monotonic_increasing:
        logger.debug(
            f"run_destination_simulate {trace_label} sorting choosers because not monotonic_increasing"
        )
        choosers = choosers.sort_index()

    if estimator:
        estimator.write_choosers(choosers)

    alt_dest_col_name = model_settings['ALT_DEST_COL_NAME']
    origin_col_name = model_settings['CHOOSER_ORIG_COL_NAME']

    # alternatives are pre-sampled and annotated with logsums and pick_count
    # but we have to merge size_terms column into alt sample list
    destination_sample['size_term'] = \
        reindex(destination_size_terms.size_term, destination_sample[alt_dest_col_name])

    tracing.dump_df(DUMP, destination_sample, trace_label, 'alternatives')

    constants = config.get_model_constants(model_settings)

    logger.info("Running tour_destination_simulate with %d persons",
                len(choosers))

    # create wrapper with keys for this lookup - in this case there is a home_zone_id in the choosers
    # and a zone_id in the alternatives which get merged during interaction
    # the skims will be available under the name "skims" for any @ expressions
    skim_dict = network_los.get_default_skim_dict()
    skims = skim_dict.wrap(origin_col_name, alt_dest_col_name)

    locals_d = {
        'skims': skims,
    }
    if constants is not None:
        locals_d.update(constants)

    tracing.dump_df(DUMP, choosers, trace_label, 'choosers')

    choices = interaction_sample_simulate(choosers,
                                          destination_sample,
                                          spec=model_spec,
                                          choice_column=alt_dest_col_name,
                                          want_logsums=want_logsums,
                                          skims=skims,
                                          locals_d=locals_d,
                                          chunk_size=chunk_size,
                                          trace_label=trace_label,
                                          trace_choice_name='destination',
                                          estimator=estimator)

    if not want_logsums:
        # for consistency, always return a dataframe with canonical column name
        assert isinstance(choices, pd.Series)
        choices = choices.to_frame('choice')

    return choices
Ejemplo n.º 13
0
def school_location_logsums(persons_merged, land_use, skim_dict, skim_stack,
                            school_location_sample, configs_dir, chunk_size,
                            trace_hh_id):
    """
    add logsum column to existing school_location_sample able

    logsum is calculated by running the mode_choice model for each sample (person, dest_taz) pair
    in school_location_sample, and computing the logsum of all the utilities

                                                   <added>
    PERID,  dest_TAZ, rand,            pick_count, logsum
    23750,  14,       0.565502716034,  4           1.85659498857
    23750,  16,       0.711135838871,  6           1.92315598631
    ...
    23751,  12,       0.408038878552,  1           2.40612135416
    23751,  14,       0.972732479292,  2           1.44009018355

    """

    trace_label = 'school_location_logsums'

    # extract logsums_spec from omnibus_spec
    # omnibus_spec = orca.get_injectable('tour_mode_choice_spec')
    # for tour_type in ['school', 'university']:
    #     logsums_spec = get_segment_and_unstack(omnibus_spec, tour_type)
    #     tracing.dump_df(DUMP, logsums_spec, trace_label, 'logsums_spec_%s' % tour_type)

    school_location_settings = config.read_model_settings(
        configs_dir, 'school_location.yaml')

    alt_col_name = school_location_settings["ALT_COL_NAME"]

    # FIXME - just using settings from tour_mode_choice
    logsum_settings = config.read_model_settings(configs_dir,
                                                 'tour_mode_choice.yaml')

    persons_merged = persons_merged.to_frame()
    school_location_sample = school_location_sample.to_frame()

    logger.info("Running school_location_sample with %s rows" %
                len(school_location_sample))

    # FIXME - MEMORY HACK - only include columns actually used in spec
    chooser_columns = school_location_settings['LOGSUM_CHOOSER_COLUMNS']
    persons_merged = persons_merged[chooser_columns]

    tracing.dump_df(DUMP, persons_merged, trace_label, 'persons_merged')

    logsums_list = []
    for school_type in ['university', 'highschool', 'gradeschool']:

        logsums_spec = mode_choice_logsums_spec(configs_dir, school_type)

        choosers = school_location_sample[school_location_sample['school_type']
                                          == school_type]

        choosers = pd.merge(choosers,
                            persons_merged,
                            left_index=True,
                            right_index=True,
                            how="left")

        choosers['in_period'] = time_period_label(
            school_location_settings['IN_PERIOD'])
        choosers['out_period'] = time_period_label(
            school_location_settings['OUT_PERIOD'])

        # FIXME - should do this in expression file?
        choosers['dest_topology'] = reindex(land_use.TOPOLOGY,
                                            choosers[alt_col_name])
        choosers['dest_density_index'] = reindex(land_use.density_index,
                                                 choosers[alt_col_name])

        tracing.dump_df(DUMP, choosers, trace_label,
                        '%s_choosers' % school_type)

        logsums = compute_logsums(choosers, logsums_spec, logsum_settings,
                                  skim_dict, skim_stack, alt_col_name,
                                  chunk_size, trace_hh_id, trace_label)

        logsums_list.append(logsums)

    logsums = pd.concat(logsums_list)

    # add_column series should have an index matching the table to which it is being added
    # logsums does, since school_location_sample was on left side of merge creating choosers
    orca.add_column("school_location_sample", "mode_choice_logsum", logsums)
def atwork_subtour_destination_simulate(subtours, persons_merged,
                                        destination_sample, want_logsums,
                                        model_settings, skim_dict,
                                        destination_size_terms, estimator,
                                        chunk_size, trace_label):
    """
    atwork_subtour_destination model on atwork_subtour_destination_sample
    annotated with mode_choice logsum to select a destination from sample alternatives
    """

    model_spec = simulate.read_model_spec(file_name=model_settings['SPEC'])
    coefficients_df = simulate.read_model_coefficients(model_settings)
    model_spec = simulate.eval_coefficients(model_spec, coefficients_df,
                                            estimator)

    # interaction_sample_simulate insists choosers appear in same order as alts
    subtours = subtours.sort_index()

    # merge persons into tours
    choosers = pd.merge(subtours,
                        persons_merged,
                        left_on='person_id',
                        right_index=True)
    # FIXME - MEMORY HACK - only include columns actually used in spec
    chooser_columns = model_settings['SIMULATE_CHOOSER_COLUMNS']
    choosers = choosers[chooser_columns]

    if estimator:
        estimator.write_choosers(choosers)

    alt_dest_col_name = model_settings['ALT_DEST_COL_NAME']
    chooser_col_name = 'workplace_taz'

    # alternatives are pre-sampled and annotated with logsums and pick_count
    # but we have to merge destination_size_terms columns into alt sample list
    alternatives = \
        pd.merge(destination_sample, destination_size_terms,
                 left_on=alt_dest_col_name, right_index=True, how="left")

    tracing.dump_df(DUMP, alternatives, trace_label, 'alternatives')

    constants = config.get_model_constants(model_settings)

    logger.info("Running atwork_subtour_destination_simulate with %d persons",
                len(choosers))

    # create wrapper with keys for this lookup - in this case there is a TAZ in the choosers
    # and a TAZ in the alternatives which get merged during interaction
    # the skims will be available under the name "skims" for any @ expressions
    skims = skim_dict.wrap(chooser_col_name, alt_dest_col_name)

    locals_d = {
        'skims': skims,
    }
    if constants is not None:
        locals_d.update(constants)

    tracing.dump_df(DUMP, choosers, trace_label, 'choosers')

    choices = interaction_sample_simulate(
        choosers,
        alternatives,
        spec=model_spec,
        choice_column=alt_dest_col_name,
        want_logsums=want_logsums,
        skims=skims,
        locals_d=locals_d,
        chunk_size=chunk_size,
        trace_label=trace_label,
        trace_choice_name='workplace_location',
        estimator=estimator)

    if not want_logsums:
        # for consistency, always return a dataframe with canonical column name
        assert isinstance(choices, pd.Series)
        choices = choices.to_frame('choice')

    return choices
def atwork_subtour_destination_simulate(tours,
                                        persons_merged,
                                        atwork_subtour_destination_sample,
                                        atwork_subtour_destination_spec,
                                        skim_dict,
                                        destination_size_terms,
                                        configs_dir,
                                        chunk_size,
                                        trace_hh_id):
    """
    atwork_subtour_destination model on atwork_subtour_destination_sample
    annotated with mode_choice logsum to select a destination from sample alternatives
    """

    trace_label = 'atwork_subtour_destination_simulate'
    model_settings = inject.get_injectable('atwork_subtour_destination_settings')

    tours = tours.to_frame()
    subtours = tours[tours.tour_category == 'subtour']
    # merge persons into tours
    choosers = pd.merge(subtours,
                        persons_merged.to_frame(),
                        left_on='person_id', right_index=True)

    alt_col_name = model_settings["ALT_COL_NAME"]
    chooser_col_name = 'workplace_taz'

    # alternatives are pre-sampled and annotated with logsums and pick_count
    # but we have to merge additional alt columns into alt sample list
    atwork_subtour_destination_sample = atwork_subtour_destination_sample.to_frame()
    destination_size_terms = destination_size_terms.to_frame()
    alternatives = \
        pd.merge(atwork_subtour_destination_sample, destination_size_terms,
                 left_on=alt_col_name, right_index=True, how="left")

    tracing.dump_df(DUMP, alternatives, trace_label, 'alternatives')

    constants = config.get_model_constants(model_settings)

    sample_pool_size = len(destination_size_terms.index)

    logger.info("Running atwork_subtour_destination_simulate with %d persons" % len(choosers))

    # create wrapper with keys for this lookup - in this case there is a TAZ in the choosers
    # and a TAZ in the alternatives which get merged during interaction
    # the skims will be available under the name "skims" for any @ expressions
    skims = skim_dict.wrap(chooser_col_name, alt_col_name)

    locals_d = {
        'skims': skims,
        'sample_pool_size': float(sample_pool_size)
    }
    if constants is not None:
        locals_d.update(constants)

    # FIXME - MEMORY HACK - only include columns actually used in spec
    chooser_columns = model_settings['SIMULATE_CHOOSER_COLUMNS']
    choosers = choosers[chooser_columns]

    tracing.dump_df(DUMP, choosers, trace_label, 'choosers')

    choices = interaction_sample_simulate(
        choosers,
        alternatives,
        spec=atwork_subtour_destination_spec,
        choice_column=alt_col_name,
        skims=skims,
        locals_d=locals_d,
        chunk_size=chunk_size,
        trace_label=trace_label,
        trace_choice_name='workplace_location')

    tracing.print_summary('subtour destination', choices, describe=True)

    subtours['destination'] = choices

    results = expressions.compute_columns(
        df=subtours,
        model_settings='annotate_tours_with_dest',
        configs_dir=configs_dir,
        trace_label=trace_label)

    assign_in_place(tours, subtours[['destination']])
    assign_in_place(tours, results)

    pipeline.replace_table("tours", tours)

    if trace_hh_id:
        tracing.trace_df(tours,
                         label=trace_label,
                         columns=['destination'],
                         warn_if_empty=True)
def atwork_subtour_destination_logsums(persons_merged,
                                       land_use,
                                       skim_dict, skim_stack,
                                       atwork_subtour_destination_sample,
                                       configs_dir,
                                       chunk_size,
                                       trace_hh_id):
    """
    add logsum column to existing workplace_location_sample able

    logsum is calculated by running the mode_choice model for each sample (person, dest_taz) pair
    in workplace_location_sample, and computing the logsum of all the utilities

    +-------+--------------+----------------+------------+----------------+
    | PERID | dest_TAZ     | rand           | pick_count | logsum (added) |
    +=======+==============+================+============+================+
    | 23750 |  14          | 0.565502716034 | 4          |  1.85659498857 |
    +-------+--------------+----------------+------------+----------------+
    + 23750 | 16           | 0.711135838871 | 6          | 1.92315598631  |
    +-------+--------------+----------------+------------+----------------+
    + ...   |              |                |            |                |
    +-------+--------------+----------------+------------+----------------+
    | 23751 | 12           | 0.408038878552 | 1          | 2.40612135416  |
    +-------+--------------+----------------+------------+----------------+
    | 23751 | 14           | 0.972732479292 | 2          |  1.44009018355 |
    +-------+--------------+----------------+------------+----------------+

    """

    trace_label = 'atwork_subtour_destination_logsums'
    model_settings = inject.get_injectable('atwork_subtour_destination_settings')

    logsums_spec = mode_choice_logsums_spec(configs_dir, 'work')

    alt_col_name = model_settings["ALT_COL_NAME"]
    chooser_col_name = 'workplace_taz'

    # FIXME - just using settings from tour_mode_choice
    logsum_settings = config.read_model_settings(configs_dir, 'tour_mode_choice.yaml')

    persons_merged = persons_merged.to_frame()
    atwork_subtour_destination_sample = atwork_subtour_destination_sample.to_frame()

    # FIXME - MEMORY HACK - only include columns actually used in spec
    chooser_columns = model_settings['LOGSUM_CHOOSER_COLUMNS']
    persons_merged = persons_merged[chooser_columns]

    # merge persons into tours
    choosers = pd.merge(atwork_subtour_destination_sample,
                        persons_merged,
                        left_on='person_id',
                        right_index=True,
                        how="left")

    choosers['in_period'] = skim_time_period_label(model_settings['IN_PERIOD'])
    choosers['out_period'] = skim_time_period_label(model_settings['OUT_PERIOD'])

    # FIXME - should do this in expression file?
    choosers['dest_topology'] = reindex(land_use.TOPOLOGY, choosers[alt_col_name])
    choosers['dest_density_index'] = reindex(land_use.density_index, choosers[alt_col_name])

    logger.info("Running atwork_subtour_destination_logsums with %s rows" % len(choosers))

    tracing.dump_df(DUMP, persons_merged, trace_label, 'persons_merged')
    tracing.dump_df(DUMP, choosers, trace_label, 'choosers')

    logsums = compute_logsums(
        choosers, logsums_spec, logsum_settings,
        skim_dict, skim_stack, chooser_col_name, alt_col_name, chunk_size, trace_hh_id, trace_label)

    # "add_column series should have an index matching the table to which it is being added"
    # when the index has duplicates, however, in the special case that the series index exactly
    # matches the table index, then the series value order is preserved. logsums does have a
    # matching index, since atwork_subtour_destination_sample was on left side of merge de-dup merge
    inject.add_column("atwork_subtour_destination_sample", "mode_choice_logsum", logsums)
Ejemplo n.º 17
0
def atwork_subtour_scheduling(
        tours,
        persons_merged,
        tdd_alts,
        skim_dict,
        chunk_size,
        trace_hh_id):
    """
    This model predicts the departure time and duration of each activity for at work subtours tours
    """

    trace_label = 'atwork_subtour_scheduling'
    model_settings = config.read_model_settings('tour_scheduling_atwork.yaml')
    model_spec = simulate.read_model_spec(file_name='tour_scheduling_atwork.csv')

    persons_merged = persons_merged.to_frame()

    tours = tours.to_frame()
    subtours = tours[tours.tour_category == 'atwork']

    # - if no atwork subtours
    if subtours.shape[0] == 0:
        tracing.no_results(trace_label)
        return

    logger.info("Running %s with %d tours", trace_label, len(subtours))

    # preprocessor
    constants = config.get_model_constants(model_settings)
    od_skim_wrapper = skim_dict.wrap('origin', 'destination')
    do_skim_wrapper = skim_dict.wrap('destination', 'origin')
    skims = {
        "od_skims": od_skim_wrapper,
        "do_skims": do_skim_wrapper,
    }
    annotate_preprocessors(
        subtours, constants, skims,
        model_settings, trace_label)

    # parent_tours table with columns ['tour_id', 'tdd'] index = tour_id
    parent_tour_ids = subtours.parent_tour_id.astype(int).unique()
    parent_tours = pd.DataFrame({'tour_id': parent_tour_ids}, index=parent_tour_ids)
    parent_tours = parent_tours.merge(tours[['tdd']], left_index=True, right_index=True)

    tdd_choices = vectorize_subtour_scheduling(
        parent_tours,
        subtours,
        persons_merged,
        tdd_alts, model_spec,
        model_settings,
        chunk_size=chunk_size,
        trace_label=trace_label)

    assign_in_place(tours, tdd_choices)
    pipeline.replace_table("tours", tours)

    if trace_hh_id:
        tracing.trace_df(tours[tours.tour_category == 'atwork'],
                         label="atwork_subtour_scheduling",
                         slicer='person_id',
                         index_label='tour_id',
                         columns=None)

    if DUMP:
        subtours = tours[tours.tour_category == 'atwork']
        parent_tours = tours[tours.index.isin(subtours.parent_tour_id)]

        tracing.dump_df(DUMP, subtours, trace_label, 'sub_tours')
        tracing.dump_df(DUMP, parent_tours, trace_label, 'parent_tours')

        parent_tours['parent_tour_id'] = parent_tours.index
        subtours = pd.concat([parent_tours, subtours])
        tracing.dump_df(DUMP,
                        tt.tour_map(parent_tours, subtours, tdd_alts,
                                    persons_id_col='parent_tour_id'),
                        trace_label, 'tour_map')
Ejemplo n.º 18
0
def run_destination_simulate(
        spec_segment_name,
        tours,
        persons_merged,
        destination_sample,
        model_settings,
        skim_dict,
        destination_size_terms,
        chunk_size, trace_label):
    """
    run destination_simulate on tour_destination_sample
    annotated with mode_choice logsum to select a destination from sample alternatives
    """

    model_spec_file_name = model_settings['SPEC']
    model_spec = simulate.read_model_spec(file_name=model_spec_file_name)
    model_spec = model_spec[[spec_segment_name]]

    # merge persons into tours
    choosers = pd.merge(tours,
                        persons_merged,
                        left_on='person_id', right_index=True, how='left')
    # FIXME - MEMORY HACK - only include columns actually used in spec
    chooser_columns = model_settings['SIMULATE_CHOOSER_COLUMNS']
    choosers = choosers[chooser_columns]

    alt_dest_col_name = model_settings["ALT_DEST_COL_NAME"]
    origin_col_name = model_settings['CHOOSER_ORIG_COL_NAME']

    # alternatives are pre-sampled and annotated with logsums and pick_count
    # but we have to merge size_terms column into alt sample list
    destination_sample['size_term'] = \
        reindex(destination_size_terms.size_term, destination_sample[alt_dest_col_name])

    tracing.dump_df(DUMP, destination_sample, trace_label, 'alternatives')

    constants = config.get_model_constants(model_settings)

    logger.info("Running tour_destination_simulate with %d persons", len(choosers))

    # create wrapper with keys for this lookup - in this case there is a TAZ in the choosers
    # and a TAZ in the alternatives which get merged during interaction
    # the skims will be available under the name "skims" for any @ expressions
    skims = skim_dict.wrap(origin_col_name, alt_dest_col_name)

    locals_d = {
        'skims': skims,
    }
    if constants is not None:
        locals_d.update(constants)

    tracing.dump_df(DUMP, choosers, trace_label, 'choosers')

    choices = interaction_sample_simulate(
        choosers,
        destination_sample,
        spec=model_spec,
        choice_column=alt_dest_col_name,
        skims=skims,
        locals_d=locals_d,
        chunk_size=chunk_size,
        trace_label=trace_label,
        trace_choice_name='destination')

    return choices
Ejemplo n.º 19
0
def workplace_location_logsums(persons_merged, land_use, skim_dict, skim_stack,
                               workplace_location_sample, configs_dir,
                               chunk_size, trace_hh_id):
    """
    add logsum column to existing workplace_location_sample able

    logsum is calculated by running the mode_choice model for each sample (person, dest_taz) pair
    in workplace_location_sample, and computing the logsum of all the utilities

                                                   <added>
    PERID,  dest_TAZ, rand,            pick_count, logsum
    23750,  14,       0.565502716034,  4           1.85659498857
    23750,  16,       0.711135838871,  6           1.92315598631
    ...
    23751,  12,       0.408038878552,  1           2.40612135416
    23751,  14,       0.972732479292,  2           1.44009018355

    """

    trace_label = 'workplace_location_logsums'

    logsums_spec = mode_choice_logsums_spec(configs_dir, 'work')

    workplace_location_settings = config.read_model_settings(
        configs_dir, 'workplace_location.yaml')

    alt_col_name = workplace_location_settings["ALT_COL_NAME"]

    # FIXME - just using settings from tour_mode_choice
    logsum_settings = config.read_model_settings(configs_dir,
                                                 'tour_mode_choice.yaml')

    persons_merged = persons_merged.to_frame()
    workplace_location_sample = workplace_location_sample.to_frame()

    logger.info("Running workplace_location_sample with %s rows" %
                len(workplace_location_sample))

    # FIXME - MEMORY HACK - only include columns actually used in spec
    chooser_columns = workplace_location_settings['LOGSUM_CHOOSER_COLUMNS']
    persons_merged = persons_merged[chooser_columns]

    choosers = pd.merge(workplace_location_sample,
                        persons_merged,
                        left_index=True,
                        right_index=True,
                        how="left")

    choosers['in_period'] = time_period_label(
        workplace_location_settings['IN_PERIOD'])
    choosers['out_period'] = time_period_label(
        workplace_location_settings['OUT_PERIOD'])

    # FIXME - should do this in expression file?
    choosers['dest_topology'] = reindex(land_use.TOPOLOGY,
                                        choosers[alt_col_name])
    choosers['dest_density_index'] = reindex(land_use.density_index,
                                             choosers[alt_col_name])

    tracing.dump_df(DUMP, persons_merged, trace_label, 'persons_merged')
    tracing.dump_df(DUMP, choosers, trace_label, 'choosers')

    logsums = compute_logsums(choosers, logsums_spec, logsum_settings,
                              skim_dict, skim_stack, alt_col_name, chunk_size,
                              trace_hh_id, trace_label)

    # "add_column series should have an index matching the table to which it is being added"
    # when the index has duplicates, however, in the special case that the series index exactly
    # matches the table index, then the series value order is preserved
    # logsums now does, since workplace_location_sample was on left side of merge de-dup merge
    orca.add_column("workplace_location_sample", "mode_choice_logsum", logsums)
def atwork_subtour_scheduling(tours, persons_merged, tdd_alts,
                              tdd_subtour_spec,
                              atwork_subtour_scheduling_settings, configs_dir,
                              chunk_size, trace_hh_id):
    """
    This model predicts the departure time and duration of each activity for at work subtours tours
    """

    trace_label = 'atwork_subtour_scheduling'
    constants = config.get_model_constants(atwork_subtour_scheduling_settings)

    persons_merged = persons_merged.to_frame()

    tours = tours.to_frame()
    subtours = tours[tours.tour_category == 'subtour']

    logger.info("Running atwork_subtour_scheduling with %d tours" %
                len(subtours))

    # parent_tours table with columns ['tour_id', 'tdd'] index = tour_id
    parent_tour_ids = subtours.parent_tour_id.astype(int).unique()
    parent_tours = pd.DataFrame({'tour_id': parent_tour_ids},
                                index=parent_tour_ids)
    parent_tours = parent_tours.merge(tours[['tdd']],
                                      left_index=True,
                                      right_index=True)
    """
    parent_tours
               tour_id   tdd
    20973389  20973389    26
    44612864  44612864     3
    48954854  48954854     7
    """

    tdd_choices = vectorize_subtour_scheduling(parent_tours,
                                               subtours,
                                               persons_merged,
                                               tdd_alts,
                                               tdd_subtour_spec,
                                               constants=constants,
                                               chunk_size=chunk_size,
                                               trace_label=trace_label)
    assign_in_place(subtours, tdd_choices)

    expressions.assign_columns(df=subtours,
                               model_settings='annotate_tours',
                               configs_dir=configs_dir,
                               trace_label=trace_label)

    assign_in_place(tours, subtours)
    pipeline.replace_table("tours", tours)

    tracing.dump_df(
        DUMP,
        tt.tour_map(parent_tours,
                    subtours,
                    tdd_alts,
                    persons_id_col='parent_tour_id'), trace_label, 'tour_map')

    if trace_hh_id:
        tracing.trace_df(subtours,
                         label="atwork_subtour_scheduling",
                         slicer='person_id',
                         index_label='tour_id',
                         columns=None,
                         warn_if_empty=True)
Ejemplo n.º 21
0
def workplace_location_simulate(persons_merged, workplace_location_sample,
                                workplace_location_spec,
                                workplace_location_settings, skim_dict,
                                destination_size_terms, chunk_size,
                                trace_hh_id):
    """
    Workplace location model on workplace_location_sample annotated with mode_choice logsum
    to select a work_taz from sample alternatives
    """

    # for now I'm going to generate a workplace location for everyone -
    # presumably it will not get used in downstream models for everyone -
    # it should depend on CDAP and mandatory tour generation as to whether
    # it gets used
    choosers = persons_merged.to_frame()

    alt_col_name = workplace_location_settings["ALT_COL_NAME"]

    # alternatives are pre-sampled and annotated with logsums and pick_count
    # but we have to merge additional alt columns into alt sample list
    workplace_location_sample = workplace_location_sample.to_frame()
    destination_size_terms = destination_size_terms.to_frame()
    alternatives = \
        pd.merge(workplace_location_sample, destination_size_terms,
                 left_on=alt_col_name, right_index=True, how="left")

    tracing.dump_df(DUMP, alternatives, 'workplace_location_simulate',
                    'alternatives')

    constants = config.get_model_constants(workplace_location_settings)

    sample_pool_size = len(destination_size_terms.index)

    logger.info("Running workplace_location_simulate with %d persons" %
                len(choosers))

    # create wrapper with keys for this lookup - in this case there is a TAZ in the choosers
    # and a TAZ in the alternatives which get merged during interaction
    # the skims will be available under the name "skims" for any @ expressions
    skims = skim_dict.wrap("TAZ", alt_col_name)

    locals_d = {'skims': skims, 'sample_pool_size': float(sample_pool_size)}
    if constants is not None:
        locals_d.update(constants)

    # FIXME - MEMORY HACK - only include columns actually used in spec
    chooser_columns = workplace_location_settings['SIMULATE_CHOOSER_COLUMNS']
    choosers = choosers[chooser_columns]

    tracing.dump_df(DUMP, choosers, 'workplace_location_simulate', 'choosers')

    choices = interaction_sample_simulate(
        choosers,
        alternatives,
        spec=workplace_location_spec,
        choice_column=alt_col_name,
        skims=skims,
        locals_d=locals_d,
        chunk_size=chunk_size,
        trace_label=trace_hh_id and 'workplace_location',
        trace_choice_name='workplace_location')

    # FIXME - no need to reindex since we didn't slice choosers
    # choices = choices.reindex(persons_merged.index)

    tracing.print_summary('workplace_taz', choices, describe=True)

    orca.add_column("persons", "workplace_taz", choices)

    pipeline.add_dependent_columns("persons", "persons_workplace")

    if trace_hh_id:
        trace_columns = ['workplace_taz'
                         ] + orca.get_table('persons_workplace').columns
        tracing.trace_df(orca.get_table('persons_merged').to_frame(),
                         label="workplace_location",
                         columns=trace_columns,
                         warn_if_empty=True)
Ejemplo n.º 22
0
def school_location_logsums(persons_merged, land_use, skim_dict, skim_stack,
                            school_location_sample, configs_dir, chunk_size,
                            trace_hh_id):
    """
    add logsum column to existing school_location_sample able

    logsum is calculated by running the mode_choice model for each sample (person, dest_taz) pair
    in school_location_sample, and computing the logsum of all the utilities

    +-------+--------------+----------------+------------+----------------+
    | PERID | dest_TAZ     | rand           | pick_count | logsum (added) |
    +=======+==============+================+============+================+
    | 23750 |  14          | 0.565502716034 | 4          |  1.85659498857 |
    +-------+--------------+----------------+------------+----------------+
    + 23750 | 16           | 0.711135838871 | 6          | 1.92315598631  |
    +-------+--------------+----------------+------------+----------------+
    + ...   |              |                |            |                |
    +-------+--------------+----------------+------------+----------------+
    | 23751 | 12           | 0.408038878552 | 1          | 2.40612135416  |
    +-------+--------------+----------------+------------+----------------+
    | 23751 | 14           | 0.972732479292 | 2          |  1.44009018355 |
    +-------+--------------+----------------+------------+----------------+
    """

    trace_label = 'school_location_logsums'

    school_location_settings = config.read_model_settings(
        configs_dir, 'school_location.yaml')

    alt_col_name = school_location_settings["ALT_COL_NAME"]
    chooser_col_name = 'TAZ'

    # FIXME - just using settings from tour_mode_choice
    logsum_settings = config.read_model_settings(configs_dir,
                                                 'tour_mode_choice.yaml')

    persons_merged = persons_merged.to_frame()
    school_location_sample = school_location_sample.to_frame()

    logger.info("Running school_location_sample with %s rows" %
                len(school_location_sample))

    # FIXME - MEMORY HACK - only include columns actually used in spec
    chooser_columns = school_location_settings['LOGSUM_CHOOSER_COLUMNS']
    persons_merged = persons_merged[chooser_columns]

    tracing.dump_df(DUMP, persons_merged, trace_label, 'persons_merged')

    logsums_list = []
    for school_type in ['university', 'highschool', 'gradeschool']:

        logsums_spec = mode_choice_logsums_spec(configs_dir, school_type)

        choosers = school_location_sample[school_location_sample['school_type']
                                          == school_type]

        choosers = pd.merge(choosers,
                            persons_merged,
                            left_index=True,
                            right_index=True,
                            how="left")

        choosers['in_period'] = skim_time_period_label(
            school_location_settings['IN_PERIOD'])
        choosers['out_period'] = skim_time_period_label(
            school_location_settings['OUT_PERIOD'])

        # FIXME - should do this in expression file?
        choosers['dest_topology'] = reindex(land_use.TOPOLOGY,
                                            choosers[alt_col_name])
        choosers['dest_density_index'] = reindex(land_use.density_index,
                                                 choosers[alt_col_name])

        tracing.dump_df(DUMP, choosers,
                        tracing.extend_trace_label(trace_label, school_type),
                        'choosers')

        logsums = compute_logsums(
            choosers, logsums_spec, logsum_settings, skim_dict, skim_stack,
            chooser_col_name, alt_col_name, chunk_size, trace_hh_id,
            tracing.extend_trace_label(trace_label, school_type))

        logsums_list.append(logsums)

    logsums = pd.concat(logsums_list)

    # add_column series should have an index matching the table to which it is being added
    # logsums does, since school_location_sample was on left side of merge creating choosers
    inject.add_column("school_location_sample", "mode_choice_logsum", logsums)
Ejemplo n.º 23
0
def mandatory_tour_scheduling(tours, persons_merged, tdd_alts, chunk_size,
                              trace_hh_id):
    """
    This model predicts the departure time and duration of each activity for mandatory tours
    """
    trace_label = 'mandatory_tour_scheduling'
    model_settings = config.read_model_settings(
        'mandatory_tour_scheduling.yaml')
    logsum_settings = config.read_model_settings(
        model_settings['LOGSUM_SETTINGS'])

    tours = tours.to_frame()
    mandatory_tours = tours[tours.tour_category == 'mandatory']

    # - if no mandatory_tours
    if mandatory_tours.shape[0] == 0:
        tracing.no_results(trace_label)
        return

    persons_merged = persons_merged.to_frame()

    # - filter chooser columns for both logsums and simulate
    logsum_columns = logsum_settings.get('LOGSUM_CHOOSER_COLUMNS', [])
    model_columns = model_settings.get('SIMULATE_CHOOSER_COLUMNS', [])
    chooser_columns = logsum_columns + [
        c for c in model_columns if c not in logsum_columns
    ]
    persons_merged = expressions.filter_chooser_columns(
        persons_merged, chooser_columns)

    # - add primary_purpose column
    # mtctm1 segments mandatory_scheduling spec by tour_type
    # (i.e. there are different specs for work and school tour_types)
    # mtctm1 logsum coefficients are segmented by primary_purpose
    # (i.e. there are different locsum coefficents for work, school, univ primary_purposes
    # for simplicity managing these different segmentation schemes,
    # we conflate them by segmenting the skims to align with primary_purpose
    segment_col = 'primary_purpose'
    if segment_col not in mandatory_tours:

        is_university_tour = \
            (mandatory_tours.tour_type == 'school') & \
            reindex(persons_merged.is_university, mandatory_tours.person_id)

        mandatory_tours['primary_purpose'] = \
            mandatory_tours.tour_type.where(~is_university_tour, 'univ')

    # - spec dict segmented by primary_purpose
    specs = model_settings.get('SPEC', [])
    segment_specs = {
        segment: simulate.read_model_spec(file_name=spec)
        for segment, spec in specs.items()
    }

    logger.info("Running mandatory_tour_scheduling with %d tours", len(tours))
    tdd_choices, timetable = vts.vectorize_tour_scheduling(
        mandatory_tours,
        persons_merged,
        tdd_alts,
        spec=segment_specs,
        segment_col=segment_col,
        model_settings=model_settings,
        chunk_size=chunk_size,
        trace_label=trace_label)

    timetable.replace_table()

    assign_in_place(tours, tdd_choices)
    pipeline.replace_table("tours", tours)

    # updated df for tracing
    mandatory_tours = tours[tours.tour_category == 'mandatory']

    tracing.dump_df(DUMP, tt.tour_map(persons_merged, mandatory_tours,
                                      tdd_alts), trace_label, 'tour_map')

    if trace_hh_id:
        tracing.trace_df(mandatory_tours,
                         label="mandatory_tour_scheduling",
                         slicer='person_id',
                         index_label='tour',
                         columns=None,
                         warn_if_empty=True)
Ejemplo n.º 24
0
def atwork_subtour_destination_simulate(
        subtours,
        persons_merged,
        destination_sample,
        skim_dict,
        destination_size_terms,
        chunk_size, trace_hh_id):
    """
    atwork_subtour_destination model on atwork_subtour_destination_sample
    annotated with mode_choice logsum to select a destination from sample alternatives
    """

    trace_label = 'atwork_subtour_destination_simulate'

    model_settings = config.read_model_settings('atwork_subtour_destination.yaml')
    model_spec = simulate.read_model_spec(file_name='atwork_subtour_destination.csv')

    # interaction_sample_simulate insists choosers appear in same order as alts
    subtours = subtours.sort_index()

    # merge persons into tours
    choosers = pd.merge(subtours,
                        persons_merged,
                        left_on='person_id', right_index=True)
    # FIXME - MEMORY HACK - only include columns actually used in spec
    chooser_columns = model_settings['SIMULATE_CHOOSER_COLUMNS']
    choosers = choosers[chooser_columns]

    alt_dest_col_name = model_settings["ALT_DEST_COL_NAME"]
    chooser_col_name = 'workplace_taz'

    # alternatives are pre-sampled and annotated with logsums and pick_count
    # but we have to merge destination_size_terms columns into alt sample list
    alternatives = \
        pd.merge(destination_sample, destination_size_terms,
                 left_on=alt_dest_col_name, right_index=True, how="left")

    tracing.dump_df(DUMP, alternatives, trace_label, 'alternatives')

    constants = config.get_model_constants(model_settings)

    logger.info("Running atwork_subtour_destination_simulate with %d persons", len(choosers))

    # create wrapper with keys for this lookup - in this case there is a TAZ in the choosers
    # and a TAZ in the alternatives which get merged during interaction
    # the skims will be available under the name "skims" for any @ expressions
    skims = skim_dict.wrap(chooser_col_name, alt_dest_col_name)

    locals_d = {
        'skims': skims,
    }
    if constants is not None:
        locals_d.update(constants)

    tracing.dump_df(DUMP, choosers, trace_label, 'choosers')

    choices = interaction_sample_simulate(
        choosers,
        alternatives,
        spec=model_spec,
        choice_column=alt_dest_col_name,
        skims=skims,
        locals_d=locals_d,
        chunk_size=chunk_size,
        trace_label=trace_label,
        trace_choice_name='workplace_location')

    return choices
Ejemplo n.º 25
0
def atwork_subtour_destination_simulate(subtours, persons_merged,
                                        destination_sample, skim_dict,
                                        destination_size_terms, chunk_size,
                                        trace_hh_id):
    """
    atwork_subtour_destination model on atwork_subtour_destination_sample
    annotated with mode_choice logsum to select a destination from sample alternatives
    """

    trace_label = 'atwork_subtour_destination_simulate'

    model_settings = config.read_model_settings(
        'atwork_subtour_destination.yaml')
    model_spec = simulate.read_model_spec(
        file_name='atwork_subtour_destination.csv')

    # interaction_sample_simulate insists choosers appear in same order as alts
    subtours = subtours.sort_index()

    # merge persons into tours
    choosers = pd.merge(subtours,
                        persons_merged,
                        left_on='person_id',
                        right_index=True)
    # FIXME - MEMORY HACK - only include columns actually used in spec
    chooser_columns = model_settings['SIMULATE_CHOOSER_COLUMNS']
    choosers = choosers[chooser_columns]

    alt_dest_col_name = model_settings["ALT_DEST_COL_NAME"]
    chooser_col_name = 'workplace_taz'

    # alternatives are pre-sampled and annotated with logsums and pick_count
    # but we have to merge destination_size_terms columns into alt sample list
    alternatives = \
        pd.merge(destination_sample, destination_size_terms,
                 left_on=alt_dest_col_name, right_index=True, how="left")

    tracing.dump_df(DUMP, alternatives, trace_label, 'alternatives')

    constants = config.get_model_constants(model_settings)

    logger.info("Running atwork_subtour_destination_simulate with %d persons",
                len(choosers))

    # create wrapper with keys for this lookup - in this case there is a TAZ in the choosers
    # and a TAZ in the alternatives which get merged during interaction
    # the skims will be available under the name "skims" for any @ expressions
    skims = skim_dict.wrap(chooser_col_name, alt_dest_col_name)

    locals_d = {
        'skims': skims,
    }
    if constants is not None:
        locals_d.update(constants)

    tracing.dump_df(DUMP, choosers, trace_label, 'choosers')

    choices = interaction_sample_simulate(
        choosers,
        alternatives,
        spec=model_spec,
        choice_column=alt_dest_col_name,
        skims=skims,
        locals_d=locals_d,
        chunk_size=chunk_size,
        trace_label=trace_label,
        trace_choice_name='workplace_location')

    return choices
Ejemplo n.º 26
0
def run_destination_simulate(spec_segment_name, tours, persons_merged,
                             destination_sample, model_settings, skim_dict,
                             destination_size_terms, chunk_size, trace_label):
    """
    run destination_simulate on tour_destination_sample
    annotated with mode_choice logsum to select a destination from sample alternatives
    """

    model_spec_file_name = model_settings['SPEC']
    model_spec = simulate.read_model_spec(file_name=model_spec_file_name)
    model_spec = model_spec[[spec_segment_name]]

    # merge persons into tours
    choosers = pd.merge(tours,
                        persons_merged,
                        left_on='person_id',
                        right_index=True,
                        how='left')
    # FIXME - MEMORY HACK - only include columns actually used in spec
    chooser_columns = model_settings['SIMULATE_CHOOSER_COLUMNS']
    choosers = choosers[chooser_columns]

    alt_dest_col_name = model_settings["ALT_DEST_COL_NAME"]
    origin_col_name = model_settings['CHOOSER_ORIG_COL_NAME']

    # alternatives are pre-sampled and annotated with logsums and pick_count
    # but we have to merge size_terms column into alt sample list
    destination_sample['size_term'] = \
        reindex(destination_size_terms.size_term, destination_sample[alt_dest_col_name])

    tracing.dump_df(DUMP, destination_sample, trace_label, 'alternatives')

    constants = config.get_model_constants(model_settings)

    logger.info("Running tour_destination_simulate with %d persons",
                len(choosers))

    # create wrapper with keys for this lookup - in this case there is a TAZ in the choosers
    # and a TAZ in the alternatives which get merged during interaction
    # the skims will be available under the name "skims" for any @ expressions
    skims = skim_dict.wrap(origin_col_name, alt_dest_col_name)

    locals_d = {
        'skims': skims,
    }
    if constants is not None:
        locals_d.update(constants)

    tracing.dump_df(DUMP, choosers, trace_label, 'choosers')

    choices = interaction_sample_simulate(choosers,
                                          destination_sample,
                                          spec=model_spec,
                                          choice_column=alt_dest_col_name,
                                          skims=skims,
                                          locals_d=locals_d,
                                          chunk_size=chunk_size,
                                          trace_label=trace_label,
                                          trace_choice_name='destination')

    return choices
def mandatory_tour_scheduling(tours, persons_merged, tdd_alts, chunk_size,
                              trace_hh_id):
    """
    This model predicts the departure time and duration of each activity for mandatory tours
    """
    trace_label = 'mandatory_tour_scheduling'
    model_settings_file_name = 'mandatory_tour_scheduling.yaml'
    estimators = {}

    model_settings = config.read_model_settings(model_settings_file_name)
    logsum_settings = config.read_model_settings(
        model_settings['LOGSUM_SETTINGS'])

    tours = tours.to_frame()
    mandatory_tours = tours[tours.tour_category == 'mandatory']

    # - if no mandatory_tours
    if mandatory_tours.shape[0] == 0:
        tracing.no_results(trace_label)
        return

    persons_merged = persons_merged.to_frame()

    # - filter chooser columns for both logsums and simulate
    logsum_columns = logsum_settings.get('LOGSUM_CHOOSER_COLUMNS', [])
    model_columns = model_settings.get('SIMULATE_CHOOSER_COLUMNS', [])
    chooser_columns = logsum_columns + [
        c for c in model_columns if c not in logsum_columns
    ]
    persons_merged = expressions.filter_chooser_columns(
        persons_merged, chooser_columns)

    # - add tour segmentation column
    # mtctm1 segments mandatory_scheduling spec by tour_type
    # (i.e. there are different specs for work and school tour_types)
    # mtctm1 logsum coefficients are segmented by primary_purpose
    # (i.e. there are different locsum coefficents for work, school, univ primary_purposes
    # for simplicity managing these different segmentation schemes,
    # we conflate them by segmenting the skims to align with primary_purpose
    tour_segment_col = 'mandatory_tour_seg'
    assert tour_segment_col not in mandatory_tours
    is_university_tour = \
        (mandatory_tours.tour_type == 'school') & \
        reindex(persons_merged.is_university, mandatory_tours.person_id)
    mandatory_tours[tour_segment_col] = \
        mandatory_tours.tour_type.where(~is_university_tour, 'univ')

    # load specs
    spec_segment_settings = model_settings.get('SPEC_SEGMENTS', {})
    specs = {}
    estimators = {}
    for spec_segment_name, spec_settings in spec_segment_settings.items():

        # estimator for this tour_segment
        estimator = estimation.manager.begin_estimation(
            model_name='mandatory_tour_scheduling_%s' % spec_segment_name,
            bundle_name='mandatory_tour_scheduling')

        spec_file_name = spec_settings['SPEC']
        model_spec = simulate.read_model_spec(file_name=spec_file_name)
        coefficients_df = simulate.read_model_coefficients(
            spec_segment_settings[spec_segment_name])
        specs[spec_segment_name] = simulate.eval_coefficients(
            model_spec, coefficients_df, estimator)

        if estimator:
            estimators[spec_segment_name] = estimator  # add to local list
            estimator.write_model_settings(model_settings,
                                           model_settings_file_name)
            estimator.write_spec(spec_settings)
            estimator.write_coefficients(coefficients_df)

    # - spec dict segmented by primary_purpose
    tour_segment_settings = model_settings.get('TOUR_SPEC_SEGMENTS', {})
    tour_segments = {}
    for tour_segment_name, spec_segment_name in tour_segment_settings.items():
        tour_segments[tour_segment_name] = {}
        tour_segments[tour_segment_name][
            'spec_segment_name'] = spec_segment_name
        tour_segments[tour_segment_name]['spec'] = specs[spec_segment_name]
        tour_segments[tour_segment_name]['estimator'] = estimators.get(
            spec_segment_name)

    timetable = inject.get_injectable("timetable")

    if estimators:
        timetable.begin_transaction(list(estimators.values()))

    logger.info("Running mandatory_tour_scheduling with %d tours", len(tours))
    choices = vts.vectorize_tour_scheduling(mandatory_tours,
                                            persons_merged,
                                            tdd_alts,
                                            timetable,
                                            tour_segments=tour_segments,
                                            tour_segment_col=tour_segment_col,
                                            model_settings=model_settings,
                                            chunk_size=chunk_size,
                                            trace_label=trace_label)

    if estimators:
        # overrride choices for all estimators
        choices_list = []
        for spec_segment_name, estimator in estimators.items():
            model_choices = choices[(
                mandatory_tours.tour_type == spec_segment_name)]

            # FIXME vectorize_tour_scheduling calls used to write_choices but perhaps shouldn't
            estimator.write_choices(model_choices)
            override_choices = estimator.get_survey_values(
                model_choices, 'tours', 'tdd')
            estimator.write_override_choices(override_choices)

            choices_list.append(override_choices)
            estimator.end_estimation()
        choices = pd.concat(choices_list)

        # update timetable to reflect the override choices (assign tours in tour_num order)
        timetable.rollback()
        for tour_num, nth_tours in tours.groupby('tour_num', sort=True):
            timetable.assign(window_row_ids=nth_tours['person_id'],
                             tdds=choices.reindex(nth_tours.index))

    # choices are tdd alternative ids
    # we want to add start, end, and duration columns to tours, which we have in tdd_alts table
    choices = pd.merge(choices.to_frame('tdd'),
                       tdd_alts,
                       left_on=['tdd'],
                       right_index=True,
                       how='left')

    assign_in_place(tours, choices)
    pipeline.replace_table("tours", tours)

    timetable.replace_table()

    # updated df for tracing
    mandatory_tours = tours[tours.tour_category == 'mandatory']

    tracing.dump_df(DUMP, tt.tour_map(persons_merged, mandatory_tours,
                                      tdd_alts), trace_label, 'tour_map')

    if trace_hh_id:
        tracing.trace_df(mandatory_tours,
                         label="mandatory_tour_scheduling",
                         slicer='person_id',
                         index_label='tour',
                         columns=None,
                         warn_if_empty=True)
Ejemplo n.º 28
0
def atwork_subtour_scheduling(
        tours,
        persons_merged,
        tdd_alts,
        skim_dict,
        chunk_size,
        trace_hh_id):
    """
    This model predicts the departure time and duration of each activity for at work subtours tours
    """

    trace_label = 'atwork_subtour_scheduling'
    model_settings_file_name = 'tour_scheduling_atwork.yaml'

    tours = tours.to_frame()
    subtours = tours[tours.tour_category == 'atwork']

    # - if no atwork subtours
    if subtours.shape[0] == 0:
        tracing.no_results(trace_label)
        return

    model_settings = config.read_model_settings(model_settings_file_name)
    estimator = estimation.manager.begin_estimation('atwork_subtour_scheduling')

    model_spec = simulate.read_model_spec(file_name=model_settings['SPEC'])
    coefficients_df = simulate.read_model_coefficients(model_settings)
    model_spec = simulate.eval_coefficients(model_spec, coefficients_df, estimator)

    persons_merged = persons_merged.to_frame()

    logger.info("Running %s with %d tours", trace_label, len(subtours))

    # preprocessor
    constants = config.get_model_constants(model_settings)
    od_skim_wrapper = skim_dict.wrap('origin', 'destination')
    do_skim_wrapper = skim_dict.wrap('destination', 'origin')
    skims = {
        "od_skims": od_skim_wrapper,
        "do_skims": do_skim_wrapper,
    }
    annotate_preprocessors(
        subtours, constants, skims,
        model_settings, trace_label)

    # parent_tours table with columns ['tour_id', 'tdd'] index = tour_id
    parent_tour_ids = subtours.parent_tour_id.astype(np.int64).unique()
    parent_tours = pd.DataFrame({'tour_id': parent_tour_ids}, index=parent_tour_ids)
    parent_tours = parent_tours.merge(tours[['tdd']], left_index=True, right_index=True)

    if estimator:
        estimator.write_model_settings(model_settings, model_settings_file_name)
        estimator.write_spec(model_settings)
        estimator.write_coefficients(coefficients_df)
        # we don't need to update timetable because subtours are scheduled inside work trip windows

    choices = vectorize_subtour_scheduling(
        parent_tours,
        subtours,
        persons_merged,
        tdd_alts, model_spec,
        model_settings,
        estimator=estimator,
        chunk_size=chunk_size,
        trace_label=trace_label)

    if estimator:
        estimator.write_choices(choices)
        choices = estimator.get_survey_values(choices, 'tours', 'tdd')
        estimator.write_override_choices(choices)
        estimator.end_estimation()

    # choices are tdd alternative ids
    # we want to add start, end, and duration columns to tours, which we have in tdd_alts table
    tdd_choices = pd.merge(choices.to_frame('tdd'), tdd_alts, left_on=['tdd'], right_index=True, how='left')

    assign_in_place(tours, tdd_choices)
    pipeline.replace_table("tours", tours)

    if trace_hh_id:
        tracing.trace_df(tours[tours.tour_category == 'atwork'],
                         label="atwork_subtour_scheduling",
                         slicer='person_id',
                         index_label='tour_id',
                         columns=None)

    if DUMP:
        subtours = tours[tours.tour_category == 'atwork']
        parent_tours = tours[tours.index.isin(subtours.parent_tour_id)]

        tracing.dump_df(DUMP, subtours, trace_label, 'sub_tours')
        tracing.dump_df(DUMP, parent_tours, trace_label, 'parent_tours')

        parent_tours['parent_tour_id'] = parent_tours.index
        subtours = pd.concat([parent_tours, subtours])
        tracing.dump_df(DUMP,
                        tt.tour_map(parent_tours, subtours, tdd_alts,
                                    persons_id_col='parent_tour_id'),
                        trace_label, 'tour_map')
Ejemplo n.º 29
0
def school_location_simulate(persons_merged, school_location_sample,
                             school_location_spec, school_location_settings,
                             skim_dict, destination_size_terms, chunk_size,
                             trace_hh_id):
    """
    School location model on school_location_sample annotated with mode_choice logsum
    to select a school_taz from sample alternatives
    """

    choosers = persons_merged.to_frame()
    school_location_sample = school_location_sample.to_frame()
    destination_size_terms = destination_size_terms.to_frame()

    trace_label = 'school_location_simulate'
    alt_col_name = school_location_settings["ALT_COL_NAME"]

    constants = config.get_model_constants(school_location_settings)

    # create wrapper with keys for this lookup - in this case there is a TAZ in the choosers
    # and a TAZ in the alternatives which get merged during interaction
    # the skims will be available under the name "skims" for any @ expressions
    skims = skim_dict.wrap("TAZ", alt_col_name)

    locals_d = {
        'skims': skims,
    }
    if constants is not None:
        locals_d.update(constants)

    # FIXME - MEMORY HACK - only include columns actually used in spec
    chooser_columns = school_location_settings['SIMULATE_CHOOSER_COLUMNS']
    choosers = choosers[chooser_columns]
    tracing.dump_df(DUMP, choosers, 'school_location_simulate', 'choosers')

    choices_list = []
    for school_type in ['university', 'highschool', 'gradeschool']:

        locals_d['segment'] = school_type

        choosers_segment = choosers[choosers["is_" + school_type]]
        alts_segment = school_location_sample[
            school_location_sample['school_type'] == school_type]

        # alternatives are pre-sampled and annotated with logsums and pick_count
        # but we have to merge additional alt columns into alt sample list
        alts_segment = \
            pd.merge(alts_segment, destination_size_terms,
                     left_on=alt_col_name, right_index=True, how="left")

        tracing.dump_df(DUMP, alts_segment, trace_label,
                        '%s_alternatives' % school_type)

        choices = interaction_sample_simulate(
            choosers_segment,
            alts_segment,
            spec=school_location_spec[[school_type]],
            choice_column=alt_col_name,
            skims=skims,
            locals_d=locals_d,
            chunk_size=chunk_size,
            trace_label=tracing.extend_trace_label(trace_label, school_type),
            trace_choice_name='school_location')

        choices_list.append(choices)

    choices = pd.concat(choices_list)

    # We only chose school locations for the subset of persons who go to school
    # so we backfill the empty choices with -1 to code as no school location
    choices = choices.reindex(persons_merged.index).fillna(-1).astype(int)

    tracing.dump_df(DUMP, choices, trace_label, 'choices')

    tracing.print_summary('school_taz', choices, describe=True)

    inject.add_column("persons", "school_taz", choices)

    pipeline.add_dependent_columns("persons", "persons_school")

    if trace_hh_id:
        trace_columns = ['school_taz'
                         ] + inject.get_table('persons_school').columns
        tracing.trace_df(inject.get_table('persons_merged').to_frame(),
                         label="school_location",
                         columns=trace_columns,
                         warn_if_empty=True)
Ejemplo n.º 30
0
def run_od_logsums(spec_segment_name, tours_merged_df, od_sample,
                   model_settings, network_los, estimator, chunk_size,
                   trace_hh_id, trace_label):
    """
    add logsum column to existing tour_destination_sample table

    logsum is calculated by running the mode_choice model for each sample
    (person, OD_id) pair in od_sample, and computing the logsum of all the utilities
    """
    chunk_tag = 'tour_od.logsums'
    logsum_settings = config.read_model_settings(
        model_settings['LOGSUM_SETTINGS'])
    origin_id_col = model_settings['ORIG_COL_NAME']
    dest_id_col = model_settings['DEST_COL_NAME']
    tour_od_id_col = get_od_id_col(origin_id_col, dest_id_col)

    # FIXME - MEMORY HACK - only include columns actually used in spec
    tours_merged_df = \
        logsum.filter_chooser_columns(tours_merged_df, logsum_settings, model_settings)

    # merge ods into choosers table
    choosers = od_sample.join(tours_merged_df, how='left')
    choosers[tour_od_id_col] = choosers[origin_id_col].astype(
        str) + '_' + choosers[dest_id_col].astype(str)

    logger.info("Running %s with %s rows", trace_label, len(choosers))

    tracing.dump_df(DUMP, choosers, trace_label, 'choosers')

    # run trip mode choice to compute tour mode choice logsums
    if logsum_settings.get('COMPUTE_TRIP_MODE_CHOICE_LOGSUMS', False):

        pseudo_tours = choosers.copy()
        trip_mode_choice_settings = config.read_model_settings(
            'trip_mode_choice')

        # tours_merged table doesn't yet have all the cols it needs to be called (e.g.
        # home_zone_id), so in order to compute tour mode choice/trip mode choice logsums
        # in this step we have to pass all tour-level attributes in with the main trips
        # table. see trip_mode_choice.py L56-61 for more details.
        tour_cols_needed = trip_mode_choice_settings.get(
            'TOURS_MERGED_CHOOSER_COLUMNS', [])
        tour_cols_needed.append(tour_od_id_col)

        # from tour_mode_choice.py
        not_university = (pseudo_tours.tour_type !=
                          'school') | ~pseudo_tours.is_university
        pseudo_tours['tour_purpose'] = \
            pseudo_tours.tour_type.where(not_university, 'univ')

        pseudo_tours['stop_frequency'] = '0out_0in'
        pseudo_tours['primary_purpose'] = pseudo_tours['tour_purpose']
        choosers_og_index = choosers.index.name
        pseudo_tours.reset_index(inplace=True)
        pseudo_tours.index.name = 'unique_id'

        # need dest_id_col to create dest col in trips, but need to preserve
        # tour dest as separate column in the trips table bc the trip mode choice
        # preprocessor isn't able to get the tour dest from the tours table bc the
        # tours don't yet have ODs.
        stop_frequency_alts = inject.get_injectable('stop_frequency_alts')
        pseudo_tours['tour_destination'] = pseudo_tours[dest_id_col]
        trips = trip.initialize_from_tours(
            pseudo_tours, stop_frequency_alts,
            [origin_id_col, dest_id_col, 'tour_destination', 'unique_id'])
        outbound = trips['outbound']
        trips['depart'] = reindex(pseudo_tours.start, trips.unique_id)
        trips.loc[~outbound,
                  'depart'] = reindex(pseudo_tours.end, trips.loc[~outbound,
                                                                  'unique_id'])

        logsum_trips = pd.DataFrame()
        nest_spec = config.get_logit_model_settings(logsum_settings)

        # actual coeffs dont matter here, just need them to load the nest structure
        coefficients = simulate.get_segment_coefficients(
            logsum_settings, pseudo_tours.iloc[0]['tour_purpose'])
        nest_spec = simulate.eval_nest_coefficients(nest_spec, coefficients,
                                                    trace_label)
        tour_mode_alts = []
        for nest in logit.each_nest(nest_spec):
            if nest.is_leaf:
                tour_mode_alts.append(nest.name)

        # repeat rows from the trips table iterating over tour mode
        for tour_mode in tour_mode_alts:
            trips['tour_mode'] = tour_mode
            logsum_trips = pd.concat((logsum_trips, trips), ignore_index=True)
        assert len(logsum_trips) == len(trips) * len(tour_mode_alts)
        logsum_trips.index.name = 'trip_id'

        for col in tour_cols_needed:
            if col not in trips:
                logsum_trips[col] = reindex(pseudo_tours[col],
                                            logsum_trips.unique_id)

        pipeline.replace_table('trips', logsum_trips)
        tracing.register_traceable_table('trips', logsum_trips)
        pipeline.get_rn_generator().add_channel('trips', logsum_trips)

        # run trip mode choice on pseudo-trips. use orca instead of pipeline to
        # execute the step because pipeline can only handle one open step at a time
        orca.run(['trip_mode_choice'])

        # grab trip mode choice logsums and pivot by tour mode and direction, index
        # on tour_id to enable merge back to choosers table
        trips = inject.get_table('trips').to_frame()
        trip_dir_mode_logsums = trips.pivot(index=['tour_id', tour_od_id_col],
                                            columns=['tour_mode', 'outbound'],
                                            values='trip_mode_choice_logsum')
        new_cols = [
            '_'.join(['logsum', mode, 'outbound' if outbound else 'inbound'])
            for mode, outbound in trip_dir_mode_logsums.columns
        ]
        trip_dir_mode_logsums.columns = new_cols

        choosers.reset_index(inplace=True)
        choosers.set_index(['tour_id', tour_od_id_col], inplace=True)
        choosers = pd.merge(choosers,
                            trip_dir_mode_logsums,
                            left_index=True,
                            right_index=True)
        choosers.reset_index(inplace=True)
        choosers.set_index(choosers_og_index, inplace=True)

        pipeline.get_rn_generator().drop_channel('trips')
        tracing.deregister_traceable_table('trips')

        assert (od_sample.index == choosers.index).all()
        for col in new_cols:
            od_sample[col] = choosers[col]

    logsums = logsum.compute_logsums(choosers, spec_segment_name,
                                     logsum_settings, model_settings,
                                     network_los, chunk_size, chunk_tag,
                                     trace_label, 'end', 'start', 'duration')

    assert (od_sample.index == logsums.index).all()
    od_sample['tour_mode_choice_logsum'] = logsums

    return od_sample