def trip_destination_simulate(
        primary_purpose,
        trips,
        destination_sample,
        model_settings,
        want_logsums,
        size_term_matrix, skims,
        chunk_size, trace_hh_id,
        trace_label):
    """
    Chose destination from destination_sample (with od_logsum and dp_logsum columns added)


    Returns
    -------
    choices - pandas.Series
        destination alt chosen
    """
    trace_label = tracing.extend_trace_label(trace_label, 'trip_dest_simulate')

    spec = get_spec_for_purpose(model_settings, 'DESTINATION_SPEC', primary_purpose)

    alt_dest_col_name = model_settings['ALT_DEST_COL_NAME']

    logger.info("Running trip_destination_simulate with %d trips", len(trips))

    locals_dict = config.get_model_constants(model_settings).copy()
    locals_dict.update({
        'size_terms': size_term_matrix
    })
    locals_dict.update(skims)

    destinations = interaction_sample_simulate(
        choosers=trips,
        alternatives=destination_sample,
        spec=spec,
        choice_column=alt_dest_col_name,
        want_logsums=want_logsums,
        allow_zero_probs=True, zero_prob_choice_val=NO_DESTINATION,
        skims=skims,
        locals_d=locals_dict,
        chunk_size=chunk_size,
        trace_label=trace_label,
        trace_choice_name='trip_dest')

    if not want_logsums:
        # for consistency, always return a dataframe with canonical column name
        assert isinstance(destinations, pd.Series)
        destinations = destinations.to_frame('choice')

    # drop any failed zero_prob destinations
    if (destinations.choice == NO_DESTINATION).any():
        # logger.debug("dropping %s failed destinations", (destinations == NO_DESTINATION).sum())
        destinations = destinations[destinations.choice != NO_DESTINATION]

    return destinations
Beispiel #2
0
def run_location_simulate(
        segment_name,
        persons_merged,
        location_sample_df,
        skim_dict,
        dest_size_terms,
        model_settings,
        chunk_size, trace_label):
    """
    run location model on location_sample annotated with mode_choice logsum
    to select a dest zone from sample alternatives
    """
    assert not persons_merged.empty

    model_spec = simulate.read_model_spec(file_name=model_settings['SPEC'])

    # FIXME - MEMORY HACK - only include columns actually used in spec
    chooser_columns = model_settings['SIMULATE_CHOOSER_COLUMNS']
    choosers = persons_merged[chooser_columns]

    alt_dest_col_name = model_settings["ALT_DEST_COL_NAME"]

    # alternatives are pre-sampled and annotated with logsums and pick_count
    # but we have to merge additional alt columns into alt sample list
    alternatives = \
        pd.merge(location_sample_df, dest_size_terms,
                 left_on=alt_dest_col_name, right_index=True, how="left")

    logger.info("Running %s with %d persons" % (trace_label, len(choosers)))

    # create wrapper with keys for this lookup - in this case there is a TAZ in the choosers
    # and a TAZ in the alternatives which get merged during interaction
    # the skims will be available under the name "skims" for any @ expressions
    skims = skim_dict.wrap("TAZ_chooser", alt_dest_col_name)

    locals_d = {
        'skims': skims,
        'segment_size': segment_name
    }
    constants = config.get_model_constants(model_settings)
    if constants is not None:
        locals_d.update(constants)

    choices = interaction_sample_simulate(
        choosers,
        alternatives,
        spec=spec_for_segment(model_spec, segment_name),
        choice_column=alt_dest_col_name,
        skims=skims,
        locals_d=locals_d,
        chunk_size=chunk_size,
        trace_label=trace_label,
        trace_choice_name=model_settings['DEST_CHOICE_COLUMN_NAME'])

    return choices
Beispiel #3
0
def trip_destination_simulate(
        primary_purpose,
        trips,
        destination_sample,
        model_settings,
        size_term_matrix, skims,
        chunk_size, trace_hh_id,
        trace_label):
    """
    Chose destination from destination_sample (with od_logsum and dp_logsum columns added)


    Returns
    -------
    choices - pandas.Series
        destination alt chosen
    """
    trace_label = tracing.extend_trace_label(trace_label, 'trip_destination_simulate')

    spec = get_spec_for_purpose(model_settings, 'DESTINATION_SPEC', primary_purpose)

    alt_dest_col_name = model_settings["ALT_DEST"]

    logger.info("Running trip_destination_simulate with %d trips", len(trips))

    locals_dict = config.get_model_constants(model_settings).copy()
    locals_dict.update({
        'size_terms': size_term_matrix
    })
    locals_dict.update(skims)

    destinations = interaction_sample_simulate(
        choosers=trips,
        alternatives=destination_sample,
        spec=spec,
        choice_column=alt_dest_col_name,
        allow_zero_probs=True, zero_prob_choice_val=NO_DESTINATION,
        skims=skims,
        locals_d=locals_dict,
        chunk_size=chunk_size,
        trace_label=trace_label,
        trace_choice_name='trip_dest')

    # drop any failed zero_prob destinations
    if (destinations == NO_DESTINATION).any():
        # logger.debug("dropping %s failed destinations", destinations == NO_DESTINATION).sum()
        destinations = destinations[destinations != NO_DESTINATION]

    return destinations
Beispiel #4
0
def parking_destination_simulate(segment_name, trips, destination_sample,
                                 model_settings, skims, chunk_size,
                                 trace_hh_id, trace_label):
    """
    Chose destination from destination_sample (with od_logsum and dp_logsum columns added)


    Returns
    -------
    choices - pandas.Series
        destination alt chosen
    """
    trace_label = tracing.extend_trace_label(trace_label,
                                             'trip_destination_simulate')

    spec = get_spec_for_segment(model_settings, 'SPECIFICATION', segment_name)

    alt_dest_col_name = model_settings['ALT_DEST_COL_NAME']

    logger.info("Running trip_destination_simulate with %d trips", len(trips))

    locals_dict = config.get_model_constants(model_settings).copy()
    locals_dict.update(skims)

    parking_locations = interaction_sample_simulate(
        choosers=trips,
        alternatives=destination_sample,
        spec=spec,
        choice_column=alt_dest_col_name,
        want_logsums=False,
        allow_zero_probs=True,
        zero_prob_choice_val=NO_DESTINATION,
        skims=skims,
        locals_d=locals_dict,
        chunk_size=chunk_size,
        trace_label=trace_label,
        trace_choice_name='parking_loc')

    # drop any failed zero_prob destinations
    if (parking_locations == NO_DESTINATION).any():
        logger.debug("dropping %s failed parking locations",
                     (parking_locations == NO_DESTINATION).sum())
        parking_locations = parking_locations[
            parking_locations != NO_DESTINATION]

    return parking_locations
def workplace_location_simulate(persons_merged, workplace_location_sample,
                                workplace_location_spec,
                                workplace_location_settings, skim_dict,
                                destination_size_terms, chunk_size,
                                trace_hh_id):
    """
    Workplace location model on workplace_location_sample annotated with mode_choice logsum
    to select a work_taz from sample alternatives
    """

    # for now I'm going to generate a workplace location for everyone -
    # presumably it will not get used in downstream models for everyone -
    # it should depend on CDAP and mandatory tour generation as to whether
    # it gets used
    choosers = persons_merged.to_frame()

    alt_col_name = workplace_location_settings["ALT_COL_NAME"]

    # alternatives are pre-sampled and annotated with logsums and pick_count
    # but we have to merge additional alt columns into alt sample list
    workplace_location_sample = workplace_location_sample.to_frame()
    destination_size_terms = destination_size_terms.to_frame()
    alternatives = \
        pd.merge(workplace_location_sample, destination_size_terms,
                 left_on=alt_col_name, right_index=True, how="left")

    tracing.dump_df(DUMP, alternatives, 'workplace_location_simulate',
                    'alternatives')

    constants = config.get_model_constants(workplace_location_settings)

    sample_pool_size = len(destination_size_terms.index)

    logger.info("Running workplace_location_simulate with %d persons" %
                len(choosers))

    # create wrapper with keys for this lookup - in this case there is a TAZ in the choosers
    # and a TAZ in the alternatives which get merged during interaction
    # the skims will be available under the name "skims" for any @ expressions
    skims = skim_dict.wrap("TAZ", alt_col_name)

    locals_d = {'skims': skims, 'sample_pool_size': float(sample_pool_size)}
    if constants is not None:
        locals_d.update(constants)

    # FIXME - MEMORY HACK - only include columns actually used in spec
    chooser_columns = workplace_location_settings['SIMULATE_CHOOSER_COLUMNS']
    choosers = choosers[chooser_columns]

    tracing.dump_df(DUMP, choosers, 'workplace_location_simulate', 'choosers')

    choices = interaction_sample_simulate(
        choosers,
        alternatives,
        spec=workplace_location_spec,
        choice_column=alt_col_name,
        skims=skims,
        locals_d=locals_d,
        chunk_size=chunk_size,
        trace_label=trace_hh_id and 'workplace_location',
        trace_choice_name='workplace_location')

    # FIXME - no need to reindex since we didn't slice choosers
    # choices = choices.reindex(persons_merged.index)

    tracing.print_summary('workplace_taz', choices, describe=True)

    orca.add_column("persons", "workplace_taz", choices)

    pipeline.add_dependent_columns("persons", "persons_workplace")

    if trace_hh_id:
        trace_columns = ['workplace_taz'
                         ] + orca.get_table('persons_workplace').columns
        tracing.trace_df(orca.get_table('persons_merged').to_frame(),
                         label="workplace_location",
                         columns=trace_columns,
                         warn_if_empty=True)
Beispiel #6
0
def _schedule_tours(tours, persons_merged, alts, spec, logsum_tour_purpose,
                    model_settings, skims, timetable, window_id_col,
                    previous_tour, tour_owner_id_col, estimator,
                    tour_trace_label):
    """
    previous_tour stores values used to add columns that can be used in the spec
    which have to do with the previous tours per person.  Every column in the
    alternatives table is appended with the suffix "_previous" and made
    available.  So if your alternatives table has columns for start and end,
    then start_previous and end_previous will be set to the start and end of
    the most recent tour for a person.  The first time through,
    start_previous and end_previous are undefined, so make sure to protect
    with a tour_num >= 2 in the variable computation.

    Parameters
    ----------
    tours : DataFrame
        chunk of tours to schedule with unique timetable window_id_col
    persons_merged : DataFrame
        DataFrame of persons to be merged with tours containing attributes referenced
        by expressions in spec
    alts : DataFrame
        DataFrame of alternatives which represent all possible time slots.
        tdd_interaction_dataset function will use timetable to filter them to omit
        unavailable alternatives
    spec : DataFrame
        The spec which will be passed to interaction_simulate.
    model_settings : dict
    timetable : TimeTable
        timetable of timewidows for person (or subtour) with rows for tours[window_id_col]
    window_id_col : str
        column name from tours that identifies timetable owner (or None if tours index)
        - person_id for non/mandatory tours
        - parent_tour_id for subtours,
        - None (tours index) for joint_tours since every tour may have different participants)
    previous_tour: Series
        series with value of tdd_alt choice for last previous tour scheduled for
    tour_owner_id_col : str
        column name from tours that identifies 'owner' of this tour
        (person_id for non/mandatory tours, parent_tour_id for subtours,
        household_id for joint_tours)
    tour_trace_label

    Returns
    -------

    """

    logger.info("%s schedule_tours running %d tour choices" %
                (tour_trace_label, len(tours)))

    # merge persons into tours
    # avoid dual suffix for redundant columns names (e.g. household_id) that appear in both
    tours = pd.merge(tours,
                     persons_merged,
                     left_on='person_id',
                     right_index=True,
                     suffixes=('', '_y'))
    chunk.log_df(tour_trace_label, "tours", tours)

    # - add explicit window_id_col for timetable owner if it is index
    # if no timetable window_id_col specified, then add index as an explicit column
    # (this is not strictly necessary but its presence makes code simpler in several places)
    if window_id_col is None:
        window_id_col = tours.index.name
        tours[window_id_col] = tours.index

    # timetable can't handle multiple tours per window_id
    assert not tours[window_id_col].duplicated().any()

    # - build interaction dataset filtered to include only available tdd alts
    # dataframe columns start, end , duration, person_id, tdd
    # indexed (not unique) on tour_id
    choice_column = TDD_CHOICE_COLUMN
    alt_tdd = tdd_interaction_dataset(tours, alts, timetable, choice_column,
                                      window_id_col, tour_trace_label)
    # print(f"tours {tours.shape} alts {alts.shape}")
    chunk.log_df(tour_trace_label, "alt_tdd", alt_tdd)

    # - add logsums
    if logsum_tour_purpose:
        logsums = \
            compute_logsums(alt_tdd, tours, logsum_tour_purpose, model_settings, skims, tour_trace_label)
    else:
        logsums = 0
    alt_tdd['mode_choice_logsum'] = logsums

    # - merge in previous tour columns
    # adds start_previous and end_previous, joins on index
    tours = \
        tours.join(get_previous_tour_by_tourid(tours[tour_owner_id_col], previous_tour, alts))
    chunk.log_df(tour_trace_label, "tours", tours)

    # - make choices
    locals_d = {'tt': timetable}
    constants = config.get_model_constants(model_settings)
    if constants is not None:
        locals_d.update(constants)

    if not RUN_ALTS_PREPROCESSOR_BEFORE_MERGE:
        # Note: Clint was running alts_preprocessor here on tdd_interaction_dataset instead of on raw (unmerged) alts
        # and he was using logsum_tour_purpose as selector, although logically it should be the spec_segment
        # It just happened to work for example_arc.mandatory_tour_scheduling because, in that model, (unlike semcog)
        # logsum_tour_purpose and spec_segments are aligned (both logsums and spec are segmented on work, school, univ)
        # In any case, I don't see any benefit to doing this here - at least not for any existing implementations
        # but if we do, it will require passing spec_segment to schedule_tours  and _schedule_tours
        # or redundently segmenting alts (yuck!) to conform to more granular tour_segmentation (e.g. univ do school)
        spec_segment = logsum_tour_purpose  # FIXME this is not always right - see note above
        alt_tdd = run_alts_preprocessor(model_settings, alt_tdd, spec_segment,
                                        locals_d, tour_trace_label)
        chunk.log_df(tour_trace_label, "alt_tdd", alt_tdd)

    if estimator:
        # write choosers after annotation
        estimator.write_choosers(tours)
        estimator.set_alt_id(choice_column)
        estimator.write_interaction_sample_alternatives(alt_tdd)

    choices = interaction_sample_simulate(tours,
                                          alt_tdd,
                                          spec,
                                          choice_column=choice_column,
                                          locals_d=locals_d,
                                          chunk_size=0,
                                          trace_label=tour_trace_label,
                                          estimator=estimator)

    # - update previous_tour and timetable parameters

    # update previous_tour (series with most recent previous tdd choices) with latest values
    previous_tour.loc[tours[tour_owner_id_col]] = choices.values

    # update timetable with chosen tdd footprints
    timetable.assign(tours[window_id_col], choices)

    return choices
Beispiel #7
0
def run_od_simulate(spec_segment_name, tours, od_sample, want_logsums,
                    model_settings, network_los, destination_size_terms,
                    estimator, chunk_size, trace_label):
    """
    run simulate OD choices on tour_od_sample annotated with mode_choice
    logsum to select a tour OD from sample alternatives
    """

    model_spec = simulate.spec_for_segment(model_settings,
                                           spec_id='SPEC',
                                           segment_name=spec_segment_name,
                                           estimator=estimator)

    # merge persons into tours
    choosers = tours

    # FIXME - MEMORY HACK - only include columns actually used in spec
    chooser_columns = model_settings['SIMULATE_CHOOSER_COLUMNS']
    choosers = choosers[chooser_columns]

    # interaction_sample requires that choosers.index.is_monotonic_increasing
    if not choosers.index.is_monotonic_increasing:
        logger.debug(
            f"run_destination_simulate {trace_label} sorting choosers because not monotonic_increasing"
        )
        choosers = choosers.sort_index()

    if estimator:
        estimator.write_choosers(choosers)

    origin_col_name = model_settings['ORIG_COL_NAME']
    dest_col_name = model_settings['DEST_COL_NAME']
    alt_dest_col_name = model_settings['ALT_DEST_COL_NAME']
    origin_attr_cols = model_settings['ORIGIN_ATTR_COLS_TO_USE']

    alt_od_col_name = get_od_id_col(origin_col_name, dest_col_name)
    od_sample[alt_od_col_name] = create_od_id_col(od_sample, origin_col_name,
                                                  dest_col_name)

    # alternatives are pre-sampled and annotated with logsums and pick_count
    # but we have to merge size_terms column into alt sample list
    od_sample['size_term'] = \
        reindex(destination_size_terms.size_term, od_sample[alt_dest_col_name])

    # also have to add origin attribute columns
    lu = inject.get_table('land_use').to_frame(columns=origin_attr_cols)
    od_sample = pd.merge(od_sample,
                         lu,
                         left_on=origin_col_name,
                         right_index=True,
                         how='left')

    tracing.dump_df(DUMP, od_sample, trace_label, 'alternatives')

    constants = config.get_model_constants(model_settings)

    logger.info("Running tour_destination_simulate with %d persons",
                len(choosers))

    # create wrapper with keys for this lookup - in this case there is an origin ID
    # column and a destination ID columns in the alternatives table.
    # the skims will be available under the name "skims" for any @ expressions
    skim_dict = network_los.get_default_skim_dict()
    skims = skim_dict.wrap(origin_col_name, dest_col_name)

    locals_d = {
        'skims': skims,
    }
    if constants is not None:
        locals_d.update(constants)

    tracing.dump_df(DUMP, choosers, trace_label, 'choosers')
    choices = interaction_sample_simulate(
        choosers,
        od_sample,
        spec=model_spec,
        choice_column=alt_od_col_name,
        want_logsums=want_logsums,
        skims=skims,
        locals_d=locals_d,
        chunk_size=chunk_size,
        trace_label=trace_label,
        trace_choice_name='origin_destination',
        estimator=estimator)

    if not want_logsums:
        choices = choices.to_frame('choice')

    choices = _get_od_cols_from_od_id(choices, origin_col_name, dest_col_name)

    return choices
Beispiel #8
0
def atwork_subtour_destination_simulate(subtours, persons_merged,
                                        destination_sample, skim_dict,
                                        destination_size_terms, chunk_size,
                                        trace_hh_id):
    """
    atwork_subtour_destination model on atwork_subtour_destination_sample
    annotated with mode_choice logsum to select a destination from sample alternatives
    """

    trace_label = 'atwork_subtour_destination_simulate'

    model_settings = config.read_model_settings(
        'atwork_subtour_destination.yaml')
    model_spec = simulate.read_model_spec(
        file_name='atwork_subtour_destination.csv')

    # interaction_sample_simulate insists choosers appear in same order as alts
    subtours = subtours.sort_index()

    # merge persons into tours
    choosers = pd.merge(subtours,
                        persons_merged,
                        left_on='person_id',
                        right_index=True)
    # FIXME - MEMORY HACK - only include columns actually used in spec
    chooser_columns = model_settings['SIMULATE_CHOOSER_COLUMNS']
    choosers = choosers[chooser_columns]

    alt_dest_col_name = model_settings["ALT_DEST_COL_NAME"]
    chooser_col_name = 'workplace_taz'

    # alternatives are pre-sampled and annotated with logsums and pick_count
    # but we have to merge destination_size_terms columns into alt sample list
    alternatives = \
        pd.merge(destination_sample, destination_size_terms,
                 left_on=alt_dest_col_name, right_index=True, how="left")

    tracing.dump_df(DUMP, alternatives, trace_label, 'alternatives')

    constants = config.get_model_constants(model_settings)

    logger.info("Running atwork_subtour_destination_simulate with %d persons",
                len(choosers))

    # create wrapper with keys for this lookup - in this case there is a TAZ in the choosers
    # and a TAZ in the alternatives which get merged during interaction
    # the skims will be available under the name "skims" for any @ expressions
    skims = skim_dict.wrap(chooser_col_name, alt_dest_col_name)

    locals_d = {
        'skims': skims,
    }
    if constants is not None:
        locals_d.update(constants)

    tracing.dump_df(DUMP, choosers, trace_label, 'choosers')

    choices = interaction_sample_simulate(
        choosers,
        alternatives,
        spec=model_spec,
        choice_column=alt_dest_col_name,
        skims=skims,
        locals_d=locals_d,
        chunk_size=chunk_size,
        trace_label=trace_label,
        trace_choice_name='workplace_location')

    return choices
def run_location_simulate(segment_name, persons_merged, location_sample_df,
                          network_los, dest_size_terms, want_logsums,
                          estimator, model_settings, chunk_size, chunk_tag,
                          trace_label):
    """
    run location model on location_sample annotated with mode_choice logsum
    to select a dest zone from sample alternatives

    Returns
    -------
    choices : pandas.DataFrame indexed by persons_merged_df.index
        choice : location choices (zone ids)
        logsum : float logsum of choice utilities across alternatives

    logsums optional & only returned if DEST_CHOICE_LOGSUM_COLUMN_NAME specified in model_settings
    """
    assert not persons_merged.empty

    # FIXME - MEMORY HACK - only include columns actually used in spec
    chooser_columns = model_settings['SIMULATE_CHOOSER_COLUMNS']
    choosers = persons_merged[chooser_columns]

    alt_dest_col_name = model_settings['ALT_DEST_COL_NAME']

    # alternatives are pre-sampled and annotated with logsums and pick_count
    # but we have to merge additional alt columns into alt sample list
    alternatives = \
        pd.merge(location_sample_df, dest_size_terms,
                 left_on=alt_dest_col_name, right_index=True, how="left")

    logger.info("Running %s with %d persons" % (trace_label, len(choosers)))

    # create wrapper with keys for this lookup - in this case there is a home_zone_id in the choosers
    # and a zone_id in the alternatives which get merged during interaction
    # the skims will be available under the name "skims" for any @ expressions
    skim_dict = network_los.get_default_skim_dict()
    skims = skim_dict.wrap('home_zone_id', alt_dest_col_name)

    locals_d = {'skims': skims, 'segment_size': segment_name}
    constants = config.get_model_constants(model_settings)
    if constants is not None:
        locals_d.update(constants)

    if estimator:
        # write choosers after annotation
        estimator.write_choosers(choosers)
        estimator.set_alt_id(alt_dest_col_name)
        estimator.write_interaction_sample_alternatives(alternatives)

    spec = simulate.spec_for_segment(model_settings,
                                     spec_id='SPEC',
                                     segment_name=segment_name,
                                     estimator=estimator)

    log_alt_losers = config.setting('log_alt_losers', False)

    choices = interaction_sample_simulate(
        choosers,
        alternatives,
        spec=spec,
        choice_column=alt_dest_col_name,
        log_alt_losers=log_alt_losers,
        want_logsums=want_logsums,
        skims=skims,
        locals_d=locals_d,
        chunk_size=chunk_size,
        chunk_tag=chunk_tag,
        trace_label=trace_label,
        trace_choice_name=model_settings['DEST_CHOICE_COLUMN_NAME'],
        estimator=estimator)

    if not want_logsums:
        # for consistency, always return a dataframe with canonical column name
        assert isinstance(choices, pd.Series)
        choices = choices.to_frame('choice')

    assert isinstance(choices, pd.DataFrame)

    return choices
def atwork_subtour_destination_simulate(subtours, persons_merged,
                                        destination_sample, want_logsums,
                                        model_settings, skim_dict,
                                        destination_size_terms, estimator,
                                        chunk_size, trace_label):
    """
    atwork_subtour_destination model on atwork_subtour_destination_sample
    annotated with mode_choice logsum to select a destination from sample alternatives
    """

    model_spec = simulate.read_model_spec(file_name=model_settings['SPEC'])
    coefficients_df = simulate.read_model_coefficients(model_settings)
    model_spec = simulate.eval_coefficients(model_spec, coefficients_df,
                                            estimator)

    # interaction_sample_simulate insists choosers appear in same order as alts
    subtours = subtours.sort_index()

    # merge persons into tours
    choosers = pd.merge(subtours,
                        persons_merged,
                        left_on='person_id',
                        right_index=True)
    # FIXME - MEMORY HACK - only include columns actually used in spec
    chooser_columns = model_settings['SIMULATE_CHOOSER_COLUMNS']
    choosers = choosers[chooser_columns]

    if estimator:
        estimator.write_choosers(choosers)

    alt_dest_col_name = model_settings['ALT_DEST_COL_NAME']
    chooser_col_name = 'workplace_taz'

    # alternatives are pre-sampled and annotated with logsums and pick_count
    # but we have to merge destination_size_terms columns into alt sample list
    alternatives = \
        pd.merge(destination_sample, destination_size_terms,
                 left_on=alt_dest_col_name, right_index=True, how="left")

    tracing.dump_df(DUMP, alternatives, trace_label, 'alternatives')

    constants = config.get_model_constants(model_settings)

    logger.info("Running atwork_subtour_destination_simulate with %d persons",
                len(choosers))

    # create wrapper with keys for this lookup - in this case there is a TAZ in the choosers
    # and a TAZ in the alternatives which get merged during interaction
    # the skims will be available under the name "skims" for any @ expressions
    skims = skim_dict.wrap(chooser_col_name, alt_dest_col_name)

    locals_d = {
        'skims': skims,
    }
    if constants is not None:
        locals_d.update(constants)

    tracing.dump_df(DUMP, choosers, trace_label, 'choosers')

    choices = interaction_sample_simulate(
        choosers,
        alternatives,
        spec=model_spec,
        choice_column=alt_dest_col_name,
        want_logsums=want_logsums,
        skims=skims,
        locals_d=locals_d,
        chunk_size=chunk_size,
        trace_label=trace_label,
        trace_choice_name='workplace_location',
        estimator=estimator)

    if not want_logsums:
        # for consistency, always return a dataframe with canonical column name
        assert isinstance(choices, pd.Series)
        choices = choices.to_frame('choice')

    return choices
def atwork_subtour_destination_simulate(tours,
                                        persons_merged,
                                        atwork_subtour_destination_sample,
                                        atwork_subtour_destination_spec,
                                        skim_dict,
                                        destination_size_terms,
                                        configs_dir,
                                        chunk_size,
                                        trace_hh_id):
    """
    atwork_subtour_destination model on atwork_subtour_destination_sample
    annotated with mode_choice logsum to select a destination from sample alternatives
    """

    trace_label = 'atwork_subtour_destination_simulate'
    model_settings = inject.get_injectable('atwork_subtour_destination_settings')

    tours = tours.to_frame()
    subtours = tours[tours.tour_category == 'subtour']
    # merge persons into tours
    choosers = pd.merge(subtours,
                        persons_merged.to_frame(),
                        left_on='person_id', right_index=True)

    alt_col_name = model_settings["ALT_COL_NAME"]
    chooser_col_name = 'workplace_taz'

    # alternatives are pre-sampled and annotated with logsums and pick_count
    # but we have to merge additional alt columns into alt sample list
    atwork_subtour_destination_sample = atwork_subtour_destination_sample.to_frame()
    destination_size_terms = destination_size_terms.to_frame()
    alternatives = \
        pd.merge(atwork_subtour_destination_sample, destination_size_terms,
                 left_on=alt_col_name, right_index=True, how="left")

    tracing.dump_df(DUMP, alternatives, trace_label, 'alternatives')

    constants = config.get_model_constants(model_settings)

    sample_pool_size = len(destination_size_terms.index)

    logger.info("Running atwork_subtour_destination_simulate with %d persons" % len(choosers))

    # create wrapper with keys for this lookup - in this case there is a TAZ in the choosers
    # and a TAZ in the alternatives which get merged during interaction
    # the skims will be available under the name "skims" for any @ expressions
    skims = skim_dict.wrap(chooser_col_name, alt_col_name)

    locals_d = {
        'skims': skims,
        'sample_pool_size': float(sample_pool_size)
    }
    if constants is not None:
        locals_d.update(constants)

    # FIXME - MEMORY HACK - only include columns actually used in spec
    chooser_columns = model_settings['SIMULATE_CHOOSER_COLUMNS']
    choosers = choosers[chooser_columns]

    tracing.dump_df(DUMP, choosers, trace_label, 'choosers')

    choices = interaction_sample_simulate(
        choosers,
        alternatives,
        spec=atwork_subtour_destination_spec,
        choice_column=alt_col_name,
        skims=skims,
        locals_d=locals_d,
        chunk_size=chunk_size,
        trace_label=trace_label,
        trace_choice_name='workplace_location')

    tracing.print_summary('subtour destination', choices, describe=True)

    subtours['destination'] = choices

    results = expressions.compute_columns(
        df=subtours,
        model_settings='annotate_tours_with_dest',
        configs_dir=configs_dir,
        trace_label=trace_label)

    assign_in_place(tours, subtours[['destination']])
    assign_in_place(tours, results)

    pipeline.replace_table("tours", tours)

    if trace_hh_id:
        tracing.trace_df(tours,
                         label=trace_label,
                         columns=['destination'],
                         warn_if_empty=True)
def joint_tour_destination_simulate(joint_tours, households_merged,
                                    destination_sample, skim_dict,
                                    size_term_calculator, chunk_size,
                                    trace_hh_id):
    """
    choose a joint tour destination from amont the destination sample alternatives
    (annotated with logsums) and add destination TAZ column to joint_tours table
    """

    trace_label = 'joint_tour_destination_simulate'

    model_settings = config.read_model_settings('joint_tour_destination.yaml')

    # - tour types are subset of non_mandatory tour types and use same expressions
    model_spec = simulate.read_model_spec(
        file_name='non_mandatory_tour_destination.csv')

    # interaction_sample_simulate insists choosers appear in same order as alts
    joint_tours = joint_tours.sort_index()

    alt_dest_col_name = model_settings["ALT_DEST_COL_NAME"]

    logger.info("Running joint_tour_destination_simulate with %d joint_tours" %
                joint_tours.shape[0])

    # create wrapper with keys for this lookup - in this case there is a TAZ in the choosers
    # and a TAZ in the alternatives which get merged during interaction
    # the skims will be available under the name "skims" for any @ expressions
    skims = skim_dict.wrap("TAZ", alt_dest_col_name)

    locals_d = {
        'skims': skims,
    }
    constants = config.get_model_constants(model_settings)
    if constants is not None:
        locals_d.update(constants)

    # choosers are tours - in a sense tours are choosing their destination
    choosers = pd.merge(joint_tours,
                        households_merged,
                        left_on='household_id',
                        right_index=True,
                        how='left')
    # FIXME - MEMORY HACK - only include columns actually used in spec
    chooser_columns = model_settings['SIMULATE_CHOOSER_COLUMNS']
    choosers = choosers[chooser_columns]

    choices_list = []
    # segment by trip type and pick the right spec for each person type
    # for tour_type, choosers_segment in choosers.groupby('tour_type'):
    for tour_type, tour_type_id in iteritems(TOUR_TYPE_ID):

        locals_d['segment'] = tour_type

        choosers_segment = choosers[choosers.tour_type == tour_type]

        # - skip empty segments
        if choosers_segment.shape[0] == 0:
            logger.info("%s skipping tour_type %s: no tours", trace_label,
                        tour_type)
            continue

        alts_segment = destination_sample[destination_sample.tour_type_id ==
                                          tour_type_id]

        assert tour_type not in alts_segment

        # alternatives are pre-sampled and annotated with logsums and pick_count
        # but we have to merge size_terms column into alt sample list
        alts_segment['size_term'] = \
            reindex(size_term_calculator.dest_size_terms_series(tour_type),
                    alts_segment[alt_dest_col_name])

        logger.info("Running segment '%s' of %d joint_tours %d alternatives" %
                    (tour_type, len(choosers_segment), len(alts_segment)))

        assert choosers_segment.index.is_monotonic_increasing
        assert alts_segment.index.is_monotonic_increasing

        choices = interaction_sample_simulate(
            choosers_segment,
            alts_segment,
            spec=model_spec[[tour_type]],
            choice_column=alt_dest_col_name,
            skims=skims,
            locals_d=locals_d,
            chunk_size=chunk_size,
            trace_label=trace_label,
            trace_choice_name='joint_tour_destination')

        choices_list.append(choices)

    choices = pd.concat(choices_list)

    return choices
Beispiel #13
0
def run_destination_simulate(
        spec_segment_name,
        tours,
        persons_merged,
        destination_sample,
        model_settings,
        skim_dict,
        destination_size_terms,
        chunk_size, trace_label):
    """
    run destination_simulate on tour_destination_sample
    annotated with mode_choice logsum to select a destination from sample alternatives
    """

    model_spec_file_name = model_settings['SPEC']
    model_spec = simulate.read_model_spec(file_name=model_spec_file_name)
    model_spec = model_spec[[spec_segment_name]]

    # merge persons into tours
    choosers = pd.merge(tours,
                        persons_merged,
                        left_on='person_id', right_index=True, how='left')
    # FIXME - MEMORY HACK - only include columns actually used in spec
    chooser_columns = model_settings['SIMULATE_CHOOSER_COLUMNS']
    choosers = choosers[chooser_columns]

    alt_dest_col_name = model_settings["ALT_DEST_COL_NAME"]
    origin_col_name = model_settings['CHOOSER_ORIG_COL_NAME']

    # alternatives are pre-sampled and annotated with logsums and pick_count
    # but we have to merge size_terms column into alt sample list
    destination_sample['size_term'] = \
        reindex(destination_size_terms.size_term, destination_sample[alt_dest_col_name])

    tracing.dump_df(DUMP, destination_sample, trace_label, 'alternatives')

    constants = config.get_model_constants(model_settings)

    logger.info("Running tour_destination_simulate with %d persons", len(choosers))

    # create wrapper with keys for this lookup - in this case there is a TAZ in the choosers
    # and a TAZ in the alternatives which get merged during interaction
    # the skims will be available under the name "skims" for any @ expressions
    skims = skim_dict.wrap(origin_col_name, alt_dest_col_name)

    locals_d = {
        'skims': skims,
    }
    if constants is not None:
        locals_d.update(constants)

    tracing.dump_df(DUMP, choosers, trace_label, 'choosers')

    choices = interaction_sample_simulate(
        choosers,
        destination_sample,
        spec=model_spec,
        choice_column=alt_dest_col_name,
        skims=skims,
        locals_d=locals_d,
        chunk_size=chunk_size,
        trace_label=trace_label,
        trace_choice_name='destination')

    return choices
def atwork_subtour_destination_simulate(
        subtours,
        persons_merged,
        destination_sample,
        skim_dict,
        destination_size_terms,
        chunk_size, trace_hh_id):
    """
    atwork_subtour_destination model on atwork_subtour_destination_sample
    annotated with mode_choice logsum to select a destination from sample alternatives
    """

    trace_label = 'atwork_subtour_destination_simulate'

    model_settings = config.read_model_settings('atwork_subtour_destination.yaml')
    model_spec = simulate.read_model_spec(file_name='atwork_subtour_destination.csv')

    # interaction_sample_simulate insists choosers appear in same order as alts
    subtours = subtours.sort_index()

    # merge persons into tours
    choosers = pd.merge(subtours,
                        persons_merged,
                        left_on='person_id', right_index=True)
    # FIXME - MEMORY HACK - only include columns actually used in spec
    chooser_columns = model_settings['SIMULATE_CHOOSER_COLUMNS']
    choosers = choosers[chooser_columns]

    alt_dest_col_name = model_settings["ALT_DEST_COL_NAME"]
    chooser_col_name = 'workplace_taz'

    # alternatives are pre-sampled and annotated with logsums and pick_count
    # but we have to merge destination_size_terms columns into alt sample list
    alternatives = \
        pd.merge(destination_sample, destination_size_terms,
                 left_on=alt_dest_col_name, right_index=True, how="left")

    tracing.dump_df(DUMP, alternatives, trace_label, 'alternatives')

    constants = config.get_model_constants(model_settings)

    logger.info("Running atwork_subtour_destination_simulate with %d persons", len(choosers))

    # create wrapper with keys for this lookup - in this case there is a TAZ in the choosers
    # and a TAZ in the alternatives which get merged during interaction
    # the skims will be available under the name "skims" for any @ expressions
    skims = skim_dict.wrap(chooser_col_name, alt_dest_col_name)

    locals_d = {
        'skims': skims,
    }
    if constants is not None:
        locals_d.update(constants)

    tracing.dump_df(DUMP, choosers, trace_label, 'choosers')

    choices = interaction_sample_simulate(
        choosers,
        alternatives,
        spec=model_spec,
        choice_column=alt_dest_col_name,
        skims=skims,
        locals_d=locals_d,
        chunk_size=chunk_size,
        trace_label=trace_label,
        trace_choice_name='workplace_location')

    return choices
Beispiel #15
0
def run_destination_simulate(spec_segment_name, tours, persons_merged,
                             destination_sample, want_logsums, model_settings,
                             network_los, destination_size_terms, estimator,
                             chunk_size, trace_label):
    """
    run destination_simulate on tour_destination_sample
    annotated with mode_choice logsum to select a destination from sample alternatives
    """

    model_spec = simulate.spec_for_segment(model_settings,
                                           spec_id='SPEC',
                                           segment_name=spec_segment_name,
                                           estimator=estimator)

    # FIXME - MEMORY HACK - only include columns actually used in spec (omit them pre-merge)
    chooser_columns = model_settings['SIMULATE_CHOOSER_COLUMNS']
    persons_merged = persons_merged[[
        c for c in persons_merged.columns if c in chooser_columns
    ]]
    tours = tours[[
        c for c in tours.columns if c in chooser_columns or c == 'person_id'
    ]]
    choosers = pd.merge(tours,
                        persons_merged,
                        left_on='person_id',
                        right_index=True,
                        how='left')

    # interaction_sample requires that choosers.index.is_monotonic_increasing
    if not choosers.index.is_monotonic_increasing:
        logger.debug(
            f"run_destination_simulate {trace_label} sorting choosers because not monotonic_increasing"
        )
        choosers = choosers.sort_index()

    if estimator:
        estimator.write_choosers(choosers)

    alt_dest_col_name = model_settings['ALT_DEST_COL_NAME']
    origin_col_name = model_settings['CHOOSER_ORIG_COL_NAME']

    # alternatives are pre-sampled and annotated with logsums and pick_count
    # but we have to merge size_terms column into alt sample list
    destination_sample['size_term'] = \
        reindex(destination_size_terms.size_term, destination_sample[alt_dest_col_name])

    tracing.dump_df(DUMP, destination_sample, trace_label, 'alternatives')

    constants = config.get_model_constants(model_settings)

    logger.info("Running tour_destination_simulate with %d persons",
                len(choosers))

    # create wrapper with keys for this lookup - in this case there is a home_zone_id in the choosers
    # and a zone_id in the alternatives which get merged during interaction
    # the skims will be available under the name "skims" for any @ expressions
    skim_dict = network_los.get_default_skim_dict()
    skims = skim_dict.wrap(origin_col_name, alt_dest_col_name)

    locals_d = {
        'skims': skims,
    }
    if constants is not None:
        locals_d.update(constants)

    tracing.dump_df(DUMP, choosers, trace_label, 'choosers')

    choices = interaction_sample_simulate(choosers,
                                          destination_sample,
                                          spec=model_spec,
                                          choice_column=alt_dest_col_name,
                                          want_logsums=want_logsums,
                                          skims=skims,
                                          locals_d=locals_d,
                                          chunk_size=chunk_size,
                                          trace_label=trace_label,
                                          trace_choice_name='destination',
                                          estimator=estimator)

    if not want_logsums:
        # for consistency, always return a dataframe with canonical column name
        assert isinstance(choices, pd.Series)
        choices = choices.to_frame('choice')

    return choices
Beispiel #16
0
def trip_destination_simulate(primary_purpose, trips, destination_sample,
                              model_settings, want_logsums, size_term_matrix,
                              skim_hotel, estimator, chunk_size, trace_hh_id,
                              trace_label):
    """
    Chose destination from destination_sample (with od_logsum and dp_logsum columns added)


    Returns
    -------
    choices - pandas.Series
        destination alt chosen
    """
    trace_label = tracing.extend_trace_label(trace_label, 'trip_dest_simulate')
    chunk_tag = 'trip_destination.simulate'

    spec = simulate.spec_for_segment(model_settings,
                                     spec_id='DESTINATION_SPEC',
                                     segment_name=primary_purpose,
                                     estimator=estimator)

    if estimator:
        estimator.write_choosers(trips)

    alt_dest_col_name = model_settings['ALT_DEST_COL_NAME']

    logger.info("Running trip_destination_simulate with %d trips", len(trips))

    skims = skim_hotel.sample_skims(presample=False)

    locals_dict = config.get_model_constants(model_settings).copy()
    locals_dict.update({'size_terms': size_term_matrix})
    locals_dict.update(skims)

    log_alt_losers = config.setting('log_alt_losers', False)

    destinations = interaction_sample_simulate(
        choosers=trips,
        alternatives=destination_sample,
        spec=spec,
        choice_column=alt_dest_col_name,
        log_alt_losers=log_alt_losers,
        want_logsums=want_logsums,
        allow_zero_probs=True,
        zero_prob_choice_val=NO_DESTINATION,
        skims=skims,
        locals_d=locals_dict,
        chunk_size=chunk_size,
        chunk_tag=chunk_tag,
        trace_label=trace_label,
        trace_choice_name='trip_dest',
        estimator=estimator)

    if not want_logsums:
        # for consistency, always return a dataframe with canonical column name
        assert isinstance(destinations, pd.Series)
        destinations = destinations.to_frame('choice')

    if estimator:
        # need to overwrite choices here before any failed choices are suppressed
        estimator.write_choices(destinations.choice)

        destinations.choice = estimator.get_survey_values(
            destinations.choice, 'trips', 'destination')
        estimator.write_override_choices(destinations.choice)

    # drop any failed zero_prob destinations
    if (destinations.choice == NO_DESTINATION).any():
        # logger.debug("dropping %s failed destinations", (destinations == NO_DESTINATION).sum())
        destinations = destinations[destinations.choice != NO_DESTINATION]

    return destinations
Beispiel #17
0
def _schedule_tours(
        tours, persons_merged, alts, spec, constants, timetable,
        previous_tour, window_id_col, tour_trace_label):
    """
    previous_tour stores values used to add columns that can be used in the spec
    which have to do with the previous tours per person.  Every column in the
    alternatives table is appended with the suffix "_previous" and made
    available.  So if your alternatives table has columns for start and end,
    then start_previous and end_previous will be set to the start and end of
    the most recent tour for a person.  The first time through,
    start_previous and end_previous are undefined, so make sure to protect
    with a tour_num >= 2 in the variable computation.

    Parameters
    ----------
    tours : DataFrame
        chunk of tours to schedule with unique window_id_col (person_id or parent_tour_id)
    persons_merged : DataFrame
        DataFrame of persons to be merged with tours containing attributes referenced
        by expressions in spec
    alts : DataFrame
        DataFrame of alternatives which represent all possible time slots.
        tdd_interaction_dataset function will use timetable to filter them to omit
        unavailable alternatives
    spec : DataFrame
        The spec which will be passed to interaction_simulate.
    constants : dict
        dict of model-specific constants for eval
    timetable : TimeTable
        timetable of timewidows for person (or subtour) with rows for tours[window_id_col]
    previous_tour: Series
        series with value of tdd_alt choice for last previous tour scheduled for
    window_id_col : str
        column name from tours that identifies 'owner' of this tour
        (person_id for non/mandatory tours or parent_tout_id for subtours)
    tour_trace_label

    Returns
    -------

    """

    logger.info("%s schedule_tours running %d tour choices" % (tour_trace_label, len(tours)))

    if tours[window_id_col].duplicated().any():
        print "\ntours.person_id not unique\n", tours[tours[window_id_col].duplicated(keep=False)]

    # timetable can't handle multiple tours per window_id
    assert not tours[window_id_col].duplicated().any()

    # merge persons into tours
    tours = pd.merge(tours, persons_merged, left_on='person_id', right_index=True)

    # merge previous tour columns
    tours = tours.join(
        get_previous_tour_by_tourid(tours[window_id_col], previous_tour, alts)
    )

    # build interaction dataset filtered to include only available tdd alts
    # dataframe columns start, end , duration, person_id, tdd
    # indexed (not unique) on tour_id
    choice_column = 'tdd'
    alt_tdd = tdd_interaction_dataset(tours, alts, timetable, choice_column, window_id_col)

    locals_d = {
        'tt': timetable
    }
    if constants is not None:
        locals_d.update(constants)

    choices = interaction_sample_simulate(
        tours,
        alt_tdd,
        spec,
        choice_column=choice_column,
        locals_d=locals_d,
        chunk_size=0,
        trace_label=tour_trace_label
    )

    previous_tour.loc[tours[window_id_col]] = choices.values

    timetable.assign(tours[window_id_col], choices)

    cum_size = chunk.log_df_size(tour_trace_label, "tours", tours, cum_size=None)
    cum_size = chunk.log_df_size(tour_trace_label, "alt_tdd", alt_tdd, cum_size)
    chunk.log_chunk_size(tour_trace_label, cum_size)

    return choices
def _schedule_tours(
        tours, persons_merged, alts,
        spec, logsum_tour_purpose,
        model_settings,
        timetable, window_id_col,
        previous_tour, tour_owner_id_col,
        tour_trace_label):
    """
    previous_tour stores values used to add columns that can be used in the spec
    which have to do with the previous tours per person.  Every column in the
    alternatives table is appended with the suffix "_previous" and made
    available.  So if your alternatives table has columns for start and end,
    then start_previous and end_previous will be set to the start and end of
    the most recent tour for a person.  The first time through,
    start_previous and end_previous are undefined, so make sure to protect
    with a tour_num >= 2 in the variable computation.

    Parameters
    ----------
    tours : DataFrame
        chunk of tours to schedule with unique timetable window_id_col
    persons_merged : DataFrame
        DataFrame of persons to be merged with tours containing attributes referenced
        by expressions in spec
    alts : DataFrame
        DataFrame of alternatives which represent all possible time slots.
        tdd_interaction_dataset function will use timetable to filter them to omit
        unavailable alternatives
    spec : DataFrame
        The spec which will be passed to interaction_simulate.
    model_settings : dict
    timetable : TimeTable
        timetable of timewidows for person (or subtour) with rows for tours[window_id_col]
    window_id_col : str
        column name from tours that identifies timetable owner (or None if tours index)
        - person_id for non/mandatory tours
        - parent_tour_id for subtours,
        - None (tours index) for joint_tours since every tour may have different participants)
    previous_tour: Series
        series with value of tdd_alt choice for last previous tour scheduled for
    tour_owner_id_col : str
        column name from tours that identifies 'owner' of this tour
        (person_id for non/mandatory tours, parent_tour_id for subtours,
        household_id for joint_tours)
    tour_trace_label

    Returns
    -------

    """

    logger.info("%s schedule_tours running %d tour choices" % (tour_trace_label, len(tours)))

    # merge persons into tours
    # avoid dual suffix for redundant columns names (e.g. household_id) that appear in both
    tours = pd.merge(tours, persons_merged, left_on='person_id', right_index=True,
                     suffixes=('', '_y'))
    chunk.log_df(tour_trace_label, "tours", tours)

    # - add explicit window_id_col for timetable owner if it is index
    # if no timetable window_id_col specified, then add index as an explicit column
    # (this is not strictly necessary but its presence makes code simpler in several places)
    if window_id_col is None:
        window_id_col = tours.index.name
        tours[window_id_col] = tours.index

    # timetable can't handle multiple tours per window_id
    assert not tours[window_id_col].duplicated().any()

    # - build interaction dataset filtered to include only available tdd alts
    # dataframe columns start, end , duration, person_id, tdd
    # indexed (not unique) on tour_id
    choice_column = 'tdd'
    alt_tdd = tdd_interaction_dataset(tours, alts, timetable, choice_column, window_id_col,
                                      tour_trace_label)
    chunk.log_df(tour_trace_label, "alt_tdd", alt_tdd)

    # - add logsums
    if logsum_tour_purpose:
        logsums = \
            compute_logsums(alt_tdd, tours, logsum_tour_purpose, model_settings, tour_trace_label)
    else:
        logsums = 0
    alt_tdd['mode_choice_logsum'] = logsums

    # - merge in previous tour columns
    # adds start_previous and end_previous, joins on index
    tours = \
        tours.join(get_previous_tour_by_tourid(tours[tour_owner_id_col], previous_tour, alts))
    chunk.log_df(tour_trace_label, "tours", tours)

    # - make choices
    locals_d = {
        'tt': timetable
    }
    constants = config.get_model_constants(model_settings)
    if constants is not None:
        locals_d.update(constants)

    choices = interaction_sample_simulate(
        tours,
        alt_tdd,
        spec,
        choice_column=choice_column,
        locals_d=locals_d,
        chunk_size=0,
        trace_label=tour_trace_label
    )

    # - update previous_tour and timetable parameters

    # update previous_tour (series with most recent previous tdd choices) with latest values
    previous_tour.loc[tours[tour_owner_id_col]] = choices.values

    # update timetable with chosen tdd footprints
    timetable.assign(tours[window_id_col], choices)

    return choices
Beispiel #19
0
def run_destination_simulate(spec_segment_name, tours, persons_merged,
                             destination_sample, want_logsums, model_settings,
                             skim_dict, destination_size_terms, estimator,
                             chunk_size, trace_label):
    """
    run destination_simulate on tour_destination_sample
    annotated with mode_choice logsum to select a destination from sample alternatives
    """

    spec = simulate.spec_for_segment(model_settings,
                                     spec_id='SPEC',
                                     segment_name=spec_segment_name,
                                     estimator=estimator)

    # merge persons into tours
    choosers = pd.merge(tours,
                        persons_merged,
                        left_on='person_id',
                        right_index=True,
                        how='left')
    # FIXME - MEMORY HACK - only include columns actually used in spec
    chooser_columns = model_settings['SIMULATE_CHOOSER_COLUMNS']
    choosers = choosers[chooser_columns]
    if estimator:
        estimator.write_choosers(choosers)

    alt_dest_col_name = model_settings["ALT_DEST_COL_NAME"]
    origin_col_name = model_settings['CHOOSER_ORIG_COL_NAME']

    # alternatives are pre-sampled and annotated with logsums and pick_count
    # but we have to merge size_terms column into alt sample list
    destination_sample['size_term'] = \
        reindex(destination_size_terms.size_term, destination_sample[alt_dest_col_name])

    constants = config.get_model_constants(model_settings)

    logger.info("Running tour_destination_simulate with %d persons",
                len(choosers))

    # create wrapper with keys for this lookup - in this case there is a TAZ in the choosers
    # and a TAZ in the alternatives which get merged during interaction
    # the skims will be available under the name "skims" for any @ expressions
    skims = skim_dict.wrap(origin_col_name, alt_dest_col_name)

    locals_d = {
        'skims': skims,
    }
    if constants is not None:
        locals_d.update(constants)

    choices = interaction_sample_simulate(choosers,
                                          destination_sample,
                                          spec=spec,
                                          choice_column=alt_dest_col_name,
                                          want_logsums=want_logsums,
                                          skims=skims,
                                          locals_d=locals_d,
                                          chunk_size=chunk_size,
                                          trace_label=trace_label,
                                          trace_choice_name='destination',
                                          estimator=estimator)

    return choices
Beispiel #20
0
def school_location_simulate(persons_merged, school_location_sample,
                             school_location_spec, school_location_settings,
                             skim_dict, destination_size_terms, chunk_size,
                             trace_hh_id):
    """
    School location model on school_location_sample annotated with mode_choice logsum
    to select a school_taz from sample alternatives
    """

    choosers = persons_merged.to_frame()
    school_location_sample = school_location_sample.to_frame()
    destination_size_terms = destination_size_terms.to_frame()

    trace_label = 'school_location_simulate'
    alt_col_name = school_location_settings["ALT_COL_NAME"]

    constants = config.get_model_constants(school_location_settings)

    # create wrapper with keys for this lookup - in this case there is a TAZ in the choosers
    # and a TAZ in the alternatives which get merged during interaction
    # the skims will be available under the name "skims" for any @ expressions
    skims = skim_dict.wrap("TAZ", alt_col_name)

    locals_d = {
        'skims': skims,
    }
    if constants is not None:
        locals_d.update(constants)

    # FIXME - MEMORY HACK - only include columns actually used in spec
    chooser_columns = school_location_settings['SIMULATE_CHOOSER_COLUMNS']
    choosers = choosers[chooser_columns]
    tracing.dump_df(DUMP, choosers, 'school_location_simulate', 'choosers')

    choices_list = []
    for school_type in ['university', 'highschool', 'gradeschool']:

        locals_d['segment'] = school_type

        choosers_segment = choosers[choosers["is_" + school_type]]
        alts_segment = school_location_sample[
            school_location_sample['school_type'] == school_type]

        # alternatives are pre-sampled and annotated with logsums and pick_count
        # but we have to merge additional alt columns into alt sample list
        alts_segment = \
            pd.merge(alts_segment, destination_size_terms,
                     left_on=alt_col_name, right_index=True, how="left")

        tracing.dump_df(DUMP, alts_segment, trace_label,
                        '%s_alternatives' % school_type)

        choices = interaction_sample_simulate(
            choosers_segment,
            alts_segment,
            spec=school_location_spec[[school_type]],
            choice_column=alt_col_name,
            skims=skims,
            locals_d=locals_d,
            chunk_size=chunk_size,
            trace_label=tracing.extend_trace_label(trace_label, school_type),
            trace_choice_name='school_location')

        choices_list.append(choices)

    choices = pd.concat(choices_list)

    # We only chose school locations for the subset of persons who go to school
    # so we backfill the empty choices with -1 to code as no school location
    choices = choices.reindex(persons_merged.index).fillna(-1).astype(int)

    tracing.dump_df(DUMP, choices, trace_label, 'choices')

    tracing.print_summary('school_taz', choices, describe=True)

    inject.add_column("persons", "school_taz", choices)

    pipeline.add_dependent_columns("persons", "persons_school")

    if trace_hh_id:
        trace_columns = ['school_taz'
                         ] + inject.get_table('persons_school').columns
        tracing.trace_df(inject.get_table('persons_merged').to_frame(),
                         label="school_location",
                         columns=trace_columns,
                         warn_if_empty=True)
Beispiel #21
0
def _schedule_tours(tours, persons_merged, alts, spec, logsum_tour_purpose,
                    model_settings, timetable, window_id_col, previous_tour,
                    tour_owner_id_col, tour_trace_label):
    """
    previous_tour stores values used to add columns that can be used in the spec
    which have to do with the previous tours per person.  Every column in the
    alternatives table is appended with the suffix "_previous" and made
    available.  So if your alternatives table has columns for start and end,
    then start_previous and end_previous will be set to the start and end of
    the most recent tour for a person.  The first time through,
    start_previous and end_previous are undefined, so make sure to protect
    with a tour_num >= 2 in the variable computation.

    Parameters
    ----------
    tours : DataFrame
        chunk of tours to schedule with unique timetable window_id_col
    persons_merged : DataFrame
        DataFrame of persons to be merged with tours containing attributes referenced
        by expressions in spec
    alts : DataFrame
        DataFrame of alternatives which represent all possible time slots.
        tdd_interaction_dataset function will use timetable to filter them to omit
        unavailable alternatives
    spec : DataFrame
        The spec which will be passed to interaction_simulate.
    model_settings : dict
    timetable : TimeTable
        timetable of timewidows for person (or subtour) with rows for tours[window_id_col]
    window_id_col : str
        column name from tours that identifies timetable owner (or None if tours index)
        - person_id for non/mandatory tours
        - parent_tour_id for subtours,
        - None (tours index) for joint_tours since every tour may have different participants)
    previous_tour: Series
        series with value of tdd_alt choice for last previous tour scheduled for
    tour_owner_id_col : str
        column name from tours that identifies 'owner' of this tour
        (person_id for non/mandatory tours, parent_tour_id for subtours,
        household_id for joint_tours)
    tour_trace_label

    Returns
    -------

    """

    logger.info("%s schedule_tours running %d tour choices" %
                (tour_trace_label, len(tours)))

    # merge persons into tours
    # avoid dual suffix for redundant columns names (e.g. household_id) that appear in both
    tours = pd.merge(tours,
                     persons_merged,
                     left_on='person_id',
                     right_index=True,
                     suffixes=('', '_y'))
    chunk.log_df(tour_trace_label, "tours", tours)

    # - add explicit window_id_col for timetable owner if it is index
    # if no timetable window_id_col specified, then add index as an explicit column
    # (this is not strictly necessary but its presence makes code simpler in several places)
    if window_id_col is None:
        window_id_col = tours.index.name
        tours[window_id_col] = tours.index

    # timetable can't handle multiple tours per window_id
    assert not tours[window_id_col].duplicated().any()

    # - build interaction dataset filtered to include only available tdd alts
    # dataframe columns start, end , duration, person_id, tdd
    # indexed (not unique) on tour_id
    choice_column = 'tdd'
    alt_tdd = tdd_interaction_dataset(tours, alts, timetable, choice_column,
                                      window_id_col, tour_trace_label)
    chunk.log_df(tour_trace_label, "alt_tdd", alt_tdd)

    # - add logsums
    if logsum_tour_purpose:
        logsums = \
            compute_logsums(alt_tdd, tours, logsum_tour_purpose, model_settings, tour_trace_label)
    else:
        logsums = 0
    alt_tdd['mode_choice_logsum'] = logsums

    # - merge in previous tour columns
    # adds start_previous and end_previous, joins on index
    tours = \
        tours.join(get_previous_tour_by_tourid(tours[tour_owner_id_col], previous_tour, alts))
    chunk.log_df(tour_trace_label, "tours", tours)

    # - make choices
    locals_d = {'tt': timetable}
    constants = config.get_model_constants(model_settings)
    if constants is not None:
        locals_d.update(constants)

    choices = interaction_sample_simulate(tours,
                                          alt_tdd,
                                          spec,
                                          choice_column=choice_column,
                                          locals_d=locals_d,
                                          chunk_size=0,
                                          trace_label=tour_trace_label)

    # - update previous_tour and timetable parameters

    # update previous_tour (series with most recent previous tdd choices) with latest values
    previous_tour.loc[tours[tour_owner_id_col]] = choices.values

    # update timetable with chosen tdd footprints
    timetable.assign(tours[window_id_col], choices)

    return choices