Beispiel #1
0
def trip_scheduling_choice(trips, tours, skim_dict, chunk_size, trace_hh_id):

    trace_label = 'trip_scheduling_choice'
    model_settings = config.read_model_settings('trip_scheduling_choice.yaml')
    spec = get_spec_for_segment(model_settings, 'SPECIFICATION', 'stage_one')

    trips_df = trips.to_frame()
    tours_df = tours.to_frame()

    outbound_trips = trips_df[trips_df[OUTBOUND_FLAG]]
    inbound_trips = trips_df[~trips_df[OUTBOUND_FLAG]]

    last_outbound_trip = trips_df.loc[outbound_trips.groupby('tour_id')
                                      ['trip_num'].idxmax()]
    first_inbound_trip = trips_df.loc[inbound_trips.groupby('tour_id')
                                      ['trip_num'].idxmin()]

    tours_df[NUM_OB_STOPS] = outbound_trips.groupby('tour_id').size().reindex(
        tours.index) - 1
    tours_df[NUM_IB_STOPS] = inbound_trips.groupby('tour_id').size().reindex(
        tours.index) - 1
    tours_df[LAST_OB_STOP] = last_outbound_trip[[
        'tour_id', 'origin'
    ]].set_index('tour_id').reindex(tours.index)
    tours_df[FIRST_IB_STOP] = first_inbound_trip[[
        'tour_id', 'destination'
    ]].set_index('tour_id').reindex(tours.index)

    preprocessor_settings = model_settings.get('PREPROCESSOR', None)

    if preprocessor_settings:
        # hack: preprocessor adds origin column in place if it does not exist already
        od_skim_stack_wrapper = skim_dict.wrap('origin', 'destination')
        do_skim_stack_wrapper = skim_dict.wrap('destination', 'origin')
        obib_skim_stack_wrapper = skim_dict.wrap(LAST_OB_STOP, FIRST_IB_STOP)

        skims = [
            od_skim_stack_wrapper, do_skim_stack_wrapper,
            obib_skim_stack_wrapper
        ]

        locals_dict = {
            "od_skims": od_skim_stack_wrapper,
            "do_skims": do_skim_stack_wrapper,
            "obib_skims": obib_skim_stack_wrapper
        }

        simulate.set_skim_wrapper_targets(tours_df, skims)

        expressions.assign_columns(df=tours_df,
                                   model_settings=preprocessor_settings,
                                   locals_dict=locals_dict,
                                   trace_label=trace_label)

    tours_df = run_trip_scheduling_choice(spec, tours_df, skims, locals_dict,
                                          chunk_size, trace_hh_id, trace_label)

    pipeline.replace_table("tours", tours_df)
Beispiel #2
0
def run_alts_preprocessor(model_settings, alts, segment, locals_dict,
                          trace_label):
    """
    run preprocessor on alts, as specified by ALTS_PREPROCESSOR in model_settings

    we are agnostic on whether alts are merged or not

    Parameters
    ----------
    model_settings: dict
        yaml model settings file as dict
    alts: pandas.DataFrame
        tdd_alts or tdd_alts merged wiht choosers (we are agnostic)
    segment: string
        segment selector as understood by caller (e.g. logsum_tour_purpose)
    locals_dict: dict
        we let caller worry about what needs to be in it. though actually depends on modelers needs
    trace_label: string

    Returns
    -------
    alts: pandas.DataFrame
        annotated copy of alts
    """

    preprocessor_settings = model_settings.get('ALTS_PREPROCESSOR', {})

    if segment in preprocessor_settings:
        # segmented by logsum_tour_purpose
        preprocessor_settings = preprocessor_settings.get(segment)
        logger.debug(
            f"running ALTS_PREPROCESSOR with spec for {segment}: {preprocessor_settings.get('SPEC')}"
        )
    elif 'SPEC' in preprocessor_settings:
        # unsegmented (either because no segmentation, or fallback if settings has generic preprocessor)
        logger.debug(
            f"running ALTS_PREPROCESSOR with unsegmented spec {preprocessor_settings.get('SPEC')}"
        )
    else:
        logger.debug(
            f"skipping alts preprocesser because no ALTS_PREPROCESSOR segment for {segment}"
        )
        preprocessor_settings = None

    if preprocessor_settings:

        logger.debug(
            f"run_alts_preprocessor calling assign_columns for {segment} preprocessor_settings"
        )
        alts = alts.copy()

        expressions.assign_columns(df=alts,
                                   model_settings=preprocessor_settings,
                                   locals_dict=locals_dict,
                                   trace_label=trace_label)

    return alts
def annotate_jtp(model_settings, trace_label):

    # - annotate persons
    persons = inject.get_table('persons').to_frame()
    expressions.assign_columns(
        df=persons,
        model_settings=model_settings.get('annotate_persons'),
        trace_label=tracing.extend_trace_label(trace_label, 'annotate_persons'))
    pipeline.replace_table("persons", persons)
Beispiel #4
0
def annotate_tables(model_settings, trace_label):

    trace_label = tracing.extend_trace_label(trace_label, 'annotate_tables')

    chunk.log_rss(trace_label)

    annotate_tables = model_settings.get('annotate_tables', [])

    if not annotate_tables:
        logger.warning(
            f"{trace_label} - annotate_tables setting is empty - nothing to do!"
        )

    assert isinstance(annotate_tables, list), \
        f"annotate_tables settings should be a list but is {type(annotate_tables)}"

    t0 = tracing.print_elapsed_time()

    for table_info in annotate_tables:

        tablename = table_info['tablename']

        chunk.log_rss(f"{trace_label}.pre-get_table.{tablename}")

        df = inject.get_table(tablename).to_frame()
        chunk.log_df(trace_label, tablename, df)

        # - rename columns
        column_map = table_info.get('column_map', None)
        if column_map:

            warnings.warn(
                f"Setting 'column_map' has been changed to 'rename_columns'. "
                f"Support for 'column_map' in annotate_tables  will be removed in future versions.",
                FutureWarning)

            logger.info(
                f"{trace_label} - renaming {tablename} columns {column_map}")
            df.rename(columns=column_map, inplace=True)

        # - annotate
        annotate = table_info.get('annotate', None)
        if annotate:
            logger.info(
                f"{trace_label} - annotating {tablename} SPEC {annotate['SPEC']}"
            )
            expressions.assign_columns(df=df,
                                       model_settings=annotate,
                                       trace_label=trace_label)

        chunk.log_df(trace_label, tablename, df)

        # - write table to pipeline
        pipeline.replace_table(tablename, df)

        del df
        chunk.log_df(trace_label, tablename, None)
Beispiel #5
0
def trip_departure_choice(
        trips,
        trips_merged,
        skim_dict,
        chunk_size,
        trace_hh_id):

    trace_label = 'trip_departure_choice'
    model_settings = config.read_model_settings('trip_departure_choice.yaml')

    spec = simulate.read_model_spec(file_name=model_settings['SPECIFICATION'])

    trips_merged_df = trips_merged.to_frame()
    # add tour-based chunk_id so we can chunk all trips in tour together
    tour_ids = trips_merged[TOUR_ID].unique()
    trips_merged_df['chunk_id'] = reindex(pd.Series(list(range(len(tour_ids))), tour_ids), trips_merged_df.tour_id)

    max_tour_id = trips_merged[TOUR_ID].max()

    trip_departure_choice.MAX_TOUR_ID = int(np.power(10, np.ceil(np.log10(max_tour_id))))
    locals_d = config.get_model_constants(model_settings).copy()

    preprocessor_settings = model_settings.get('PREPROCESSOR', None)
    tour_legs = get_tour_legs(trips_merged_df)
    pipeline.get_rn_generator().add_channel('tour_legs', tour_legs)

    if preprocessor_settings:
        od_skim = skim_dict.wrap('origin', 'destination')
        do_skim = skim_dict.wrap('destination', 'origin')

        skims = [od_skim, do_skim]

        simulate.set_skim_wrapper_targets(trips_merged_df, skims)

        locals_d.update({
            "od_skims": od_skim,
            "do_skims": do_skim,
        })

        expressions.assign_columns(
            df=trips_merged_df,
            model_settings=preprocessor_settings,
            locals_dict=locals_d,
            trace_label=trace_label)

    choices = apply_stage_two_model(spec, trips_merged_df, chunk_size, trace_label)

    trips_df = trips.to_frame()
    trip_length = len(trips_df)
    trips_df = pd.concat([trips_df, choices], axis=1)
    assert len(trips_df) == trip_length
    assert trips_df[trips_df['depart'].isnull()].empty

    pipeline.replace_table("trips", trips_df)
def initialize_tours(network_los, households, persons, trace_hh_id):

    trace_label = 'initialize_tours'

    tours = read_input_table("tours")

    # FIXME can't use households_sliced injectable as flag like persons table does in case of resume_after.
    # FIXME could just always slice...
    slice_happened = \
        inject.get_injectable('households_sample_size', 0) > 0 \
        or inject.get_injectable('households_sample_size', 0) > 0
    if slice_happened:
        logger.info("slicing tours %s" % (tours.shape,))
        # keep all persons in the sampled households
        tours = tours[tours.person_id.isin(persons.index)]

    # annotate before patching tour_id to allow addition of REQUIRED_TOUR_COLUMNS defined above
    model_settings = config.read_model_settings('initialize_tours.yaml', mandatory=True)
    expressions.assign_columns(
        df=tours,
        model_settings=model_settings.get('annotate_tours'),
        trace_label=tracing.extend_trace_label(trace_label, 'annotate_tours'))

    skip_patch_tour_ids = model_settings.get('skip_patch_tour_ids', False)
    if skip_patch_tour_ids:
        pass
    else:
        tours = patch_tour_ids(tours)
    assert tours.index.name == 'tour_id'

    # replace table function with dataframe
    inject.add_table('tours', tours)

    pipeline.get_rn_generator().add_channel('tours', tours)

    tracing.register_traceable_table('tours', tours)

    logger.debug(f"{len(tours.household_id.unique())} unique household_ids in tours")
    logger.debug(f"{len(households.index.unique())} unique household_ids in households")
    assert not tours.index.duplicated().any()

    tours_without_persons = ~tours.person_id.isin(persons.index)
    if tours_without_persons.any():
        logger.error(f"{tours_without_persons.sum()} tours out of {len(persons)} without persons\n"
                     f"{pd.Series({'person_id': tours_without_persons.index.values})}")
        raise RuntimeError(f"{tours_without_persons.sum()} tours with bad person_id")

    if trace_hh_id:
        tracing.trace_df(tours,
                         label='initialize_tours',
                         warn_if_empty=True)
Beispiel #7
0
def compute_utilities(network_los,
                      model_settings,
                      choosers,
                      model_constants,
                      trace_label,
                      trace=False,
                      trace_column_names=None):
    """
    Compute utilities
    """
    with chunk.chunk_log(f'tvpb compute_utilities'):
        trace_label = tracing.extend_trace_label(trace_label, 'compute_utils')

        logger.debug(
            f"{trace_label} Running compute_utilities with {choosers.shape[0]} choosers"
        )

        locals_dict = {'np': np, 'los': network_los}
        locals_dict.update(model_constants)

        # we don't grok coefficients, but allow them to use constants in spec alt columns
        spec = simulate.read_model_spec(file_name=model_settings['SPEC'])
        for c in spec.columns:
            if c != simulate.SPEC_LABEL_NAME:
                spec[c] = spec[c].map(
                    lambda s: model_constants.get(s, s)).astype(float)

        with chunk.chunk_log(f'compute_utilities'):

            # - run preprocessor to annotate choosers
            preprocessor_settings = model_settings.get('PREPROCESSOR')
            if preprocessor_settings:

                # don't want to alter caller's dataframe
                choosers = choosers.copy()

                expressions.assign_columns(
                    df=choosers,
                    model_settings=preprocessor_settings,
                    locals_dict=locals_dict,
                    trace_label=trace_label)

            utilities = simulate.eval_utilities(
                spec,
                choosers,
                locals_d=locals_dict,
                trace_all_rows=trace,
                trace_label=trace_label,
                trace_column_names=trace_column_names)

    return utilities
Beispiel #8
0
def annotate_tables(model_settings, trace_label):

    annotate_tables = model_settings.get('annotate_tables', [])

    if not annotate_tables:
        logger.warning(f"{trace_label} - annotate_tables setting is empty - nothing to do!")

    assert isinstance(annotate_tables, list), \
        f"annotate_tables settings should be a list but is {type(annotate_tables)}"

    t0 = tracing.print_elapsed_time()

    for table_info in annotate_tables:

        tablename = table_info['tablename']

        df = inject.get_table(tablename).to_frame()

        # - rename columns
        column_map = table_info.get('column_map', None)
        if column_map:

            warnings.warn(f"{trace_label} - annotate_tables option 'column_map' renamed 'rename_columns' "
                          f"and moved to global settings file. Support for 'column_map' in annotate_tables "
                          f"will be removed in future versions.",
                          FutureWarning)

            logger.info(f"{trace_label} - renaming {tablename} columns {column_map}")
            df.rename(columns=column_map, inplace=True)

        # - annotate
        annotate = table_info.get('annotate', None)
        if annotate:
            logger.info(f"{trace_label} - annotating {tablename} SPEC {annotate['SPEC']}")
            expressions.assign_columns(
                df=df,
                model_settings=annotate,
                trace_label=trace_label)

        # fixme - narrow?

        # - write table to pipeline
        pipeline.replace_table(tablename, df)
Beispiel #9
0
def add_null_results(trace_label, mandatory_tour_frequency_settings):
    logger.info("Skipping %s: add_null_results", trace_label)

    persons = inject.get_table('persons').to_frame()
    persons['mandatory_tour_frequency'] = ''

    tours = pd.DataFrame()
    tours['tour_category'] = None
    tours['tour_type'] = None
    tours['person_id'] = None
    tours.index.name = 'tour_id'
    pipeline.replace_table("tours", tours)

    expressions.assign_columns(
        df=persons,
        model_settings=mandatory_tour_frequency_settings.get(
            'annotate_persons'),
        trace_label=tracing.extend_trace_label(trace_label,
                                               'annotate_persons'))

    pipeline.replace_table("persons", persons)
def joint_tour_frequency(households, persons, chunk_size, trace_hh_id):
    """
    This model predicts the frequency of making fully joint trips (see the
    alternatives above).
    """
    trace_label = 'joint_tour_frequency'
    model_settings_file_name = 'joint_tour_frequency.yaml'

    estimator = estimation.manager.begin_estimation('joint_tour_frequency')

    model_settings = config.read_model_settings(model_settings_file_name)

    alternatives = simulate.read_model_alts(
        'joint_tour_frequency_alternatives.csv', set_index='alt')

    # - only interested in households with more than one cdap travel_active person and
    # - at least one non-preschooler
    households = households.to_frame()
    multi_person_households = households[
        households.participates_in_jtf_model].copy()

    # - only interested in persons in multi_person_households
    # FIXME - gratuitous pathological efficiency move, just let yaml specify persons?
    persons = persons.to_frame()
    persons = persons[persons.household_id.isin(multi_person_households.index)]

    logger.info(
        "Running joint_tour_frequency with %d multi-person households" %
        multi_person_households.shape[0])

    # - preprocessor
    preprocessor_settings = model_settings.get('preprocessor', None)
    if preprocessor_settings:

        locals_dict = {
            'persons': persons,
            'hh_time_window_overlap': hh_time_window_overlap
        }

        expressions.assign_columns(df=multi_person_households,
                                   model_settings=preprocessor_settings,
                                   locals_dict=locals_dict,
                                   trace_label=trace_label)

    model_spec = simulate.read_model_spec(file_name=model_settings['SPEC'])
    coefficients_df = simulate.read_model_coefficients(model_settings)
    model_spec = simulate.eval_coefficients(model_spec, coefficients_df,
                                            estimator)

    nest_spec = config.get_logit_model_settings(model_settings)
    constants = config.get_model_constants(model_settings)

    if estimator:
        estimator.write_spec(model_settings)
        estimator.write_model_settings(model_settings,
                                       model_settings_file_name)
        estimator.write_coefficients(coefficients_df, model_settings)
        estimator.write_choosers(multi_person_households)

    choices = simulate.simple_simulate(
        choosers=multi_person_households,
        spec=model_spec,
        nest_spec=nest_spec,
        locals_d=constants,
        chunk_size=chunk_size,
        trace_label=trace_label,
        trace_choice_name='joint_tour_frequency',
        estimator=estimator)

    # convert indexes to alternative names
    choices = pd.Series(model_spec.columns[choices.values],
                        index=choices.index)

    if estimator:
        estimator.write_choices(choices)
        choices = estimator.get_survey_values(choices, 'households',
                                              'joint_tour_frequency')
        estimator.write_override_choices(choices)
        estimator.end_estimation()

    # - create joint_tours based on joint_tour_frequency choices

    # - we need a person_id in order to generate the tour index (and for register_traceable_table)
    # - but we don't know the tour participants yet
    # - so we arbitrarily choose the first person in the household
    # - to be point person for the purpose of generating an index and setting origin
    temp_point_persons = persons.loc[persons.PNUM == 1]
    temp_point_persons['person_id'] = temp_point_persons.index
    temp_point_persons = temp_point_persons.set_index('household_id')
    temp_point_persons = temp_point_persons[['person_id', 'home_zone_id']]

    joint_tours = \
        process_joint_tours(choices, alternatives, temp_point_persons)

    tours = pipeline.extend_table("tours", joint_tours)

    tracing.register_traceable_table('tours', joint_tours)
    pipeline.get_rn_generator().add_channel('tours', joint_tours)

    # - annotate households

    # we expect there to be an alt with no tours - which we can use to backfill non-travelers
    no_tours_alt = (alternatives.sum(axis=1) == 0).index[0]
    households['joint_tour_frequency'] = choices.reindex(
        households.index).fillna(no_tours_alt).astype(str)

    households['num_hh_joint_tours'] = joint_tours.groupby('household_id').size().\
        reindex(households.index).fillna(0).astype(np.int8)

    pipeline.replace_table("households", households)

    tracing.print_summary('joint_tour_frequency',
                          households.joint_tour_frequency,
                          value_counts=True)

    if trace_hh_id:
        tracing.trace_df(households, label="joint_tour_frequency.households")

        tracing.trace_df(joint_tours,
                         label="joint_tour_frequency.joint_tours",
                         slicer='household_id')

    if estimator:
        survey_tours = estimation.manager.get_survey_table('tours')
        survey_tours = survey_tours[survey_tours.tour_category == 'joint']

        print(f"len(survey_tours) {len(survey_tours)}")
        print(f"len(joint_tours) {len(joint_tours)}")

        different = False
        survey_tours_not_in_tours = survey_tours[~survey_tours.index.
                                                 isin(joint_tours.index)]
        if len(survey_tours_not_in_tours) > 0:
            print(f"survey_tours_not_in_tours\n{survey_tours_not_in_tours}")
            different = True
        tours_not_in_survey_tours = joint_tours[~joint_tours.index.
                                                isin(survey_tours.index)]
        if len(survey_tours_not_in_tours) > 0:
            print(f"tours_not_in_survey_tours\n{tours_not_in_survey_tours}")
            different = True
        assert not different
Beispiel #11
0
def run_trip_purpose(trips_df, estimator, chunk_size, trace_hh_id,
                     trace_label):
    """
    trip purpose - main functionality separated from model step so it can be called iteratively

    For each intermediate stop on a tour (i.e. trip other than the last trip outbound or inbound)
    each trip is assigned a purpose based on an observed frequency distribution

    The distribution should always be segmented by tour purpose and tour direction. By default it is also
    segmented by person type. The join columns can be overwritten using the "probs_join_cols" parameter in
    the model settings. The model will attempt to segment by trip depart time as well if necessary
    and depart time ranges are specified in the probability lookup table.

    Returns
    -------
    purpose: pandas.Series of purpose (str) indexed by trip_id
    """

    # uniform across trip_purpose
    chunk_tag = 'trip_purpose'

    model_settings_file_name = 'trip_purpose.yaml'
    model_settings = config.read_model_settings(model_settings_file_name)

    probs_join_cols = model_settings.get('probs_join_cols', PROBS_JOIN_COLUMNS)

    spec_file_name = model_settings.get('PROBS_SPEC', 'trip_purpose_probs.csv')
    probs_spec = pd.read_csv(config.config_file_path(spec_file_name),
                             comment='#')
    # FIXME for now, not really doing estimation for probabilistic model - just overwriting choices
    # besides, it isn't clear that named coefficients would be helpful if we had some form of estimation
    # coefficients_df = simulate.read_model_coefficients(model_settings)
    # probs_spec = map_coefficients(probs_spec, coefficients_df)

    if estimator:
        estimator.write_spec(model_settings, tag='PROBS_SPEC')
        estimator.write_model_settings(model_settings,
                                       model_settings_file_name)
        # estimator.write_coefficients(coefficients_df, model_settings)

    result_list = []

    # - last trip of outbound tour gets primary_purpose
    last_trip = (trips_df.trip_num == trips_df.trip_count)
    purpose = trips_df.primary_purpose[last_trip & trips_df.outbound]
    result_list.append(purpose)
    logger.info("assign purpose to %s last outbound trips", purpose.shape[0])

    # - last trip of inbound tour gets home (or work for atwork subtours)
    purpose = trips_df.primary_purpose[last_trip & ~trips_df.outbound]
    purpose = pd.Series(np.where(purpose == 'atwork', 'work', 'home'),
                        index=purpose.index)
    result_list.append(purpose)
    logger.info("assign purpose to %s last inbound trips", purpose.shape[0])

    # - intermediate stops (non-last trips) purpose assigned by probability table
    trips_df = trips_df[~last_trip]
    logger.info("assign purpose to %s intermediate trips", trips_df.shape[0])

    preprocessor_settings = model_settings.get('preprocessor', None)
    if preprocessor_settings:
        locals_dict = config.get_model_constants(model_settings)
        expressions.assign_columns(df=trips_df,
                                   model_settings=preprocessor_settings,
                                   locals_dict=locals_dict,
                                   trace_label=trace_label)

    use_depart_time = model_settings.get('use_depart_time', True)

    for i, trips_chunk, chunk_trace_label in \
            chunk.adaptive_chunked_choosers(trips_df, chunk_size, chunk_tag, trace_label):
        choices = choose_intermediate_trip_purpose(
            trips_chunk,
            probs_spec,
            estimator,
            probs_join_cols=probs_join_cols,
            use_depart_time=use_depart_time,
            trace_hh_id=trace_hh_id,
            trace_label=chunk_trace_label)

        result_list.append(choices)

        chunk.log_df(trace_label, f'result_list', result_list)

    if len(result_list) > 1:
        choices = pd.concat(result_list)

    return choices
def non_mandatory_tour_scheduling(tours, persons_merged, tdd_alts, chunk_size,
                                  trace_hh_id):
    """
    This model predicts the departure time and duration of each activity for non-mandatory tours
    """

    trace_label = 'non_mandatory_tour_scheduling'
    model_settings_file_name = 'non_mandatory_tour_scheduling.yaml'
    model_settings = config.read_model_settings(model_settings_file_name)

    tours = tours.to_frame()
    non_mandatory_tours = tours[tours.tour_category == 'non_mandatory']

    logger.info("Running non_mandatory_tour_scheduling with %d tours",
                len(tours))

    persons_merged = persons_merged.to_frame()

    if 'SIMULATE_CHOOSER_COLUMNS' in model_settings:
        persons_merged =\
            expressions.filter_chooser_columns(persons_merged,
                                               model_settings['SIMULATE_CHOOSER_COLUMNS'])

    constants = config.get_model_constants(model_settings)

    # - run preprocessor to annotate choosers
    preprocessor_settings = model_settings.get('preprocessor', None)
    if preprocessor_settings:

        locals_d = {}
        if constants is not None:
            locals_d.update(constants)

        expressions.assign_columns(df=non_mandatory_tours,
                                   model_settings=preprocessor_settings,
                                   locals_dict=locals_d,
                                   trace_label=trace_label)

    timetable = inject.get_injectable("timetable")

    estimator = estimation.manager.begin_estimation(
        'non_mandatory_tour_scheduling')

    model_spec = simulate.read_model_spec(file_name=model_settings['SPEC'])
    coefficients_df = simulate.read_model_coefficients(model_settings)
    model_spec = simulate.eval_coefficients(model_spec, coefficients_df,
                                            estimator)

    if estimator:
        estimator.write_model_settings(model_settings,
                                       model_settings_file_name)
        estimator.write_spec(model_settings)
        estimator.write_coefficients(coefficients_df)
        timetable.begin_transaction(estimator)

    # - non_mandatory tour scheduling is not segmented by tour type
    spec_info = {'spec': model_spec, 'estimator': estimator}

    choices = vectorize_tour_scheduling(non_mandatory_tours,
                                        persons_merged,
                                        tdd_alts,
                                        timetable,
                                        tour_segments=spec_info,
                                        tour_segment_col=None,
                                        model_settings=model_settings,
                                        chunk_size=chunk_size,
                                        trace_label=trace_label)

    if estimator:
        estimator.write_choices(choices)
        choices = estimator.get_survey_values(choices, 'tours', 'tdd')
        estimator.write_override_choices(choices)
        estimator.end_estimation()

        # update timetable to reflect the override choices (assign tours in tour_num order)
        timetable.rollback()
        for tour_num, nth_tours in non_mandatory_tours.groupby('tour_num',
                                                               sort=True):
            timetable.assign(window_row_ids=nth_tours['person_id'],
                             tdds=choices.reindex(nth_tours.index))

    timetable.replace_table()

    # choices are tdd alternative ids
    # we want to add start, end, and duration columns to tours, which we have in tdd_alts table
    choices = pd.merge(choices.to_frame('tdd'),
                       tdd_alts,
                       left_on=['tdd'],
                       right_index=True,
                       how='left')

    assign_in_place(tours, choices)
    pipeline.replace_table("tours", tours)

    # updated df for tracing
    non_mandatory_tours = tours[tours.tour_category == 'non_mandatory']

    tracing.dump_df(DUMP,
                    tt.tour_map(persons_merged, non_mandatory_tours, tdd_alts),
                    trace_label, 'tour_map')

    if trace_hh_id:
        tracing.trace_df(non_mandatory_tours,
                         label="non_mandatory_tour_scheduling",
                         slicer='person_id',
                         index_label='tour_id',
                         columns=None,
                         warn_if_empty=True)
Beispiel #13
0
def run_tour_od(tours, persons, want_logsums, want_sample_table,
                model_settings, network_los, estimator, chunk_size,
                trace_hh_id, trace_label):

    size_term_calculator = SizeTermCalculator(
        model_settings['SIZE_TERM_SELECTOR'])
    preprocessor_settings = model_settings.get('preprocessor', None)
    origin_col_name = model_settings['ORIG_COL_NAME']

    chooser_segment_column = model_settings['CHOOSER_SEGMENT_COLUMN_NAME']

    # maps segment names to compact (integer) ids
    segments = model_settings['SEGMENTS']

    # interaction_sample_simulate insists choosers appear in same order as alts
    tours = tours.sort_index()

    choices_list = []
    sample_list = []
    for segment_name in segments:

        choosers = tours[tours[chooser_segment_column] == segment_name]

        choosers = pd.merge(
            choosers,
            persons.to_frame(columns=['is_university', 'demographic_segment']),
            left_on='person_id',
            right_index=True)

        # - annotate choosers
        if preprocessor_settings:
            expressions.assign_columns(df=choosers,
                                       model_settings=preprocessor_settings,
                                       trace_label=trace_label)

        # size_term segment is segment_name
        segment_destination_size_terms = size_term_calculator.dest_size_terms_df(
            segment_name, trace_label)

        if choosers.shape[0] == 0:
            logger.info("%s skipping segment %s: no choosers", trace_label,
                        segment_name)
            continue

        # - od_sample
        spec_segment_name = segment_name  # spec_segment_name is segment_name

        od_sample_df = \
            run_od_sample(
                spec_segment_name,
                choosers,
                model_settings,
                network_los,
                segment_destination_size_terms,
                estimator,
                chunk_size=chunk_size,
                trace_label=tracing.extend_trace_label(
                    trace_label, 'sample.%s' % segment_name))

        if model_settings['ORIG_FILTER'] == 'original_MAZ > 0':
            pass
        elif model_settings['ORIG_FILTER'] == 'external_TAZ > 0':
            # sampled alts using internal mazs, so now we
            # have to convert to using the external tazs
            od_sample_df[origin_col_name] = map_maz_to_ext_maz(
                od_sample_df[origin_col_name])
        else:
            raise ValueError(
                'Not sure how you identified tour origins but you probably need '
                'to choose a different ORIG_FILTER setting')

        # - destination_logsums
        od_sample_df = \
            run_od_logsums(
                spec_segment_name,
                choosers,
                od_sample_df,
                model_settings,
                network_los,
                estimator,
                chunk_size=chunk_size,
                trace_hh_id=trace_hh_id,
                trace_label=tracing.extend_trace_label(trace_label, 'logsums.%s' % segment_name))

        # - od_simulate
        choices = \
            run_od_simulate(
                spec_segment_name,
                choosers,
                od_sample_df,
                want_logsums=want_logsums,
                model_settings=model_settings,
                network_los=network_los,
                destination_size_terms=segment_destination_size_terms,
                estimator=estimator,
                chunk_size=chunk_size,
                trace_label=tracing.extend_trace_label(trace_label, 'simulate.%s' % segment_name))

        choices_list.append(choices)
        if estimator:
            assert estimator.want_unsampled_alternatives

        if want_sample_table:
            # FIXME - sample_table
            od_sample_df.set_index(model_settings['ALT_DEST_COL_NAME'],
                                   append=True,
                                   inplace=True)
            sample_list.append(od_sample_df)

        # FIXME - want to do this here?
        del od_sample_df

    if len(choices_list) > 0:
        choices_df = pd.concat(choices_list)

    if len(sample_list) > 0:
        save_sample_df = pd.concat(sample_list)
    else:
        # this could happen either with small samples as above, or if no saved sample desired
        save_sample_df = None

    return choices_df, save_sample_df
Beispiel #14
0
def _compute_logsums(alt_tdd, tours_merged, tour_purpose, model_settings,
                     network_los, skims, trace_label):
    """
    compute logsums for tours using skims for alt_tdd out_period and in_period
    """

    trace_label = tracing.extend_trace_label(trace_label, 'logsums')

    logsum_settings = config.read_model_settings(
        model_settings['LOGSUM_SETTINGS'])

    choosers = alt_tdd.join(tours_merged, how='left', rsuffix='_chooser')
    logger.info(
        f"{trace_label} compute_logsums for {choosers.shape[0]} choosers {alt_tdd.shape[0]} alts"
    )

    # - locals_dict
    constants = config.get_model_constants(logsum_settings)
    locals_dict = {}
    locals_dict.update(constants)

    if network_los.zone_system == los.THREE_ZONE:
        # TVPB constants can appear in expressions
        locals_dict.update(
            network_los.setting('TVPB_SETTINGS.tour_mode_choice.CONSTANTS'))

    locals_dict.update(skims)

    # constrained coefficients can appear in expressions
    coefficients = simulate.get_segment_coefficients(logsum_settings,
                                                     tour_purpose)
    locals_dict.update(coefficients)

    # - run preprocessor to annotate choosers
    # allow specification of alternate preprocessor for nontour choosers
    preprocessor = model_settings.get('LOGSUM_PREPROCESSOR', 'preprocessor')
    preprocessor_settings = logsum_settings[preprocessor]

    if preprocessor_settings:

        simulate.set_skim_wrapper_targets(choosers, skims)

        expressions.assign_columns(df=choosers,
                                   model_settings=preprocessor_settings,
                                   locals_dict=locals_dict,
                                   trace_label=trace_label)

    # - compute logsums
    logsum_spec = simulate.read_model_spec(file_name=logsum_settings['SPEC'])
    logsum_spec = simulate.eval_coefficients(logsum_spec,
                                             coefficients,
                                             estimator=None)

    nest_spec = config.get_logit_model_settings(logsum_settings)
    nest_spec = simulate.eval_nest_coefficients(nest_spec, coefficients,
                                                trace_label)

    logsums = simulate.simple_simulate_logsums(choosers,
                                               logsum_spec,
                                               nest_spec,
                                               skims=skims,
                                               locals_d=locals_dict,
                                               chunk_size=0,
                                               trace_label=trace_label)

    return logsums
def atwork_subtour_frequency(tours, persons_merged, chunk_size, trace_hh_id):
    """
    This model predicts the frequency of making at-work subtour tours
    (alternatives for this model come from a separate csv file which is
    configured by the user).
    """

    trace_label = 'atwork_subtour_frequency'
    model_settings_file_name = 'atwork_subtour_frequency.yaml'

    tours = tours.to_frame()
    work_tours = tours[tours.tour_type == 'work']

    # - if no work_tours
    if len(work_tours) == 0:
        add_null_results(trace_label, tours)
        return

    model_settings = config.read_model_settings(model_settings_file_name)
    estimator = estimation.manager.begin_estimation('atwork_subtour_frequency')

    model_spec = simulate.read_model_spec(file_name=model_settings['SPEC'])
    coefficients_df = simulate.read_model_coefficients(model_settings)
    model_spec = simulate.eval_coefficients(model_spec, coefficients_df,
                                            estimator)

    alternatives = simulate.read_model_alts(
        'atwork_subtour_frequency_alternatives.csv', set_index='alt')

    # merge persons into work_tours
    persons_merged = persons_merged.to_frame()
    work_tours = pd.merge(work_tours,
                          persons_merged,
                          left_on='person_id',
                          right_index=True)

    logger.info("Running atwork_subtour_frequency with %d work tours",
                len(work_tours))

    nest_spec = config.get_logit_model_settings(model_settings)
    constants = config.get_model_constants(model_settings)

    # - preprocessor
    preprocessor_settings = model_settings.get('preprocessor', None)
    if preprocessor_settings:

        expressions.assign_columns(df=work_tours,
                                   model_settings=preprocessor_settings,
                                   trace_label=trace_label)

    if estimator:
        estimator.write_spec(model_settings)
        estimator.write_model_settings(model_settings,
                                       model_settings_file_name)
        estimator.write_coefficients(coefficients_df, model_settings)
        estimator.write_choosers(work_tours)

    choices = simulate.simple_simulate(
        choosers=work_tours,
        spec=model_spec,
        nest_spec=nest_spec,
        locals_d=constants,
        chunk_size=chunk_size,
        trace_label=trace_label,
        trace_choice_name='atwork_subtour_frequency',
        estimator=estimator)

    # convert indexes to alternative names
    choices = pd.Series(model_spec.columns[choices.values],
                        index=choices.index)

    if estimator:
        estimator.write_choices(choices)
        choices = estimator.get_survey_values(choices, 'tours',
                                              'atwork_subtour_frequency')
        estimator.write_override_choices(choices)
        estimator.end_estimation()

    # add atwork_subtour_frequency column to tours
    # reindex since we are working with a subset of tours
    tours['atwork_subtour_frequency'] = choices.reindex(tours.index)
    pipeline.replace_table("tours", tours)

    # - create atwork_subtours based on atwork_subtour_frequency choice names
    work_tours = tours[tours.tour_type == 'work']
    assert not work_tours.atwork_subtour_frequency.isnull().any()

    subtours = process_atwork_subtours(work_tours, alternatives)

    tours = pipeline.extend_table("tours", subtours)

    tracing.register_traceable_table('tours', subtours)
    pipeline.get_rn_generator().add_channel('tours', subtours)

    tracing.print_summary('atwork_subtour_frequency',
                          tours.atwork_subtour_frequency,
                          value_counts=True)

    if trace_hh_id:
        tracing.trace_df(tours, label='atwork_subtour_frequency.tours')
def telecommute_frequency(
        persons_merged, persons,
        chunk_size, trace_hh_id):
    """
    This model predicts the frequency of telecommute for a person (worker) who
    does not works from home. The alternatives of this model are 'No Telecommute',
    '1 day per week', '2 to 3 days per week' and '4 days per week'. This model
    reflects the choices of people who prefer a combination of working from home and
    office during a week.
    """

    trace_label = 'telecommute_frequency'
    model_settings_file_name = 'telecommute_frequency.yaml'

    choosers = persons_merged.to_frame()
    choosers = choosers[choosers.workplace_zone_id > -1]

    logger.info("Running %s with %d persons", trace_label, len(choosers))

    model_settings = config.read_model_settings(model_settings_file_name)
    estimator = estimation.manager.begin_estimation('telecommute_frequency')

    constants = config.get_model_constants(model_settings)

    # - preprocessor
    preprocessor_settings = model_settings.get('preprocessor', None)
    if preprocessor_settings:

        locals_d = {}
        if constants is not None:
            locals_d.update(constants)

        expressions.assign_columns(
            df=choosers,
            model_settings=preprocessor_settings,
            locals_dict=locals_d,
            trace_label=trace_label)

    model_spec = simulate.read_model_spec(file_name=model_settings['SPEC'])
    coefficients_df = simulate.read_model_coefficients(model_settings)
    model_spec = simulate.eval_coefficients(model_spec, coefficients_df, estimator)

    nest_spec = config.get_logit_model_settings(model_settings)

    if estimator:
        estimator.write_model_settings(model_settings, model_settings_file_name)
        estimator.write_spec(model_settings)
        estimator.write_coefficients(coefficients_df)
        estimator.write_choosers(choosers)

    choices = simulate.simple_simulate(
        choosers=choosers,
        spec=model_spec,
        nest_spec=nest_spec,
        locals_d=constants,
        chunk_size=chunk_size,
        trace_label=trace_label,
        trace_choice_name='telecommute_frequency',
        estimator=estimator)

    choices = pd.Series(model_spec.columns[choices.values], index=choices.index)

    if estimator:
        estimator.write_choices(choices)
        choices = estimator.get_survey_values(choices, 'persons', 'telecommute_frequency')
        estimator.write_override_choices(choices)
        estimator.end_estimation()

    persons = persons.to_frame()
    persons['telecommute_frequency'] = choices.reindex(persons.index).fillna('').astype(str)

    pipeline.replace_table("persons", persons)

    tracing.print_summary('telecommute_frequency', persons.telecommute_frequency, value_counts=True)

    if trace_hh_id:
        tracing.trace_df(persons,
                         label=trace_label,
                         warn_if_empty=True)
Beispiel #17
0
def mandatory_tour_frequency(persons_merged, chunk_size, trace_hh_id):
    """
    This model predicts the frequency of making mandatory trips (see the
    alternatives above) - these trips include work and school in some combination.
    """
    trace_label = 'mandatory_tour_frequency'
    model_settings_file_name = 'mandatory_tour_frequency.yaml'

    model_settings = config.read_model_settings(model_settings_file_name)

    choosers = persons_merged.to_frame()
    # filter based on results of CDAP
    choosers = choosers[choosers.cdap_activity == 'M']
    logger.info("Running mandatory_tour_frequency with %d persons",
                len(choosers))

    # - if no mandatory tours
    if choosers.shape[0] == 0:
        add_null_results(trace_label, model_settings)
        return

    # - preprocessor
    preprocessor_settings = model_settings.get('preprocessor', None)
    if preprocessor_settings:

        locals_dict = {}

        expressions.assign_columns(df=choosers,
                                   model_settings=preprocessor_settings,
                                   locals_dict=locals_dict,
                                   trace_label=trace_label)

    estimator = estimation.manager.begin_estimation('mandatory_tour_frequency')

    model_spec = simulate.read_model_spec(file_name=model_settings['SPEC'])
    coefficients_df = simulate.read_model_coefficients(model_settings)
    model_spec = simulate.eval_coefficients(model_spec, coefficients_df,
                                            estimator)

    nest_spec = config.get_logit_model_settings(model_settings)
    constants = config.get_model_constants(model_settings)

    if estimator:
        estimator.write_spec(model_settings)
        estimator.write_model_settings(model_settings,
                                       model_settings_file_name)
        estimator.write_coefficients(coefficients_df, model_settings)
        estimator.write_choosers(choosers)

    choices = simulate.simple_simulate(
        choosers=choosers,
        spec=model_spec,
        nest_spec=nest_spec,
        locals_d=constants,
        chunk_size=chunk_size,
        trace_label=trace_label,
        trace_choice_name='mandatory_tour_frequency',
        estimator=estimator)

    # convert indexes to alternative names
    choices = pd.Series(model_spec.columns[choices.values],
                        index=choices.index)

    if estimator:
        estimator.write_choices(choices)
        choices = estimator.get_survey_values(choices, 'persons',
                                              'mandatory_tour_frequency')
        estimator.write_override_choices(choices)
        estimator.end_estimation()

    # - create mandatory tours
    """
    This reprocesses the choice of index of the mandatory tour frequency
    alternatives into an actual dataframe of tours.  Ending format is
    the same as got non_mandatory_tours except trip types are "work" and "school"
    """
    alternatives = simulate.read_model_alts(
        'mandatory_tour_frequency_alternatives.csv', set_index='alt')
    choosers['mandatory_tour_frequency'] = choices.reindex(choosers.index)

    mandatory_tours = process_mandatory_tours(
        persons=choosers, mandatory_tour_frequency_alts=alternatives)

    tours = pipeline.extend_table("tours", mandatory_tours)
    tracing.register_traceable_table('tours', mandatory_tours)
    pipeline.get_rn_generator().add_channel('tours', mandatory_tours)

    # - annotate persons
    persons = inject.get_table('persons').to_frame()

    # need to reindex as we only handled persons with cdap_activity == 'M'
    persons['mandatory_tour_frequency'] = choices.reindex(
        persons.index).fillna('').astype(str)

    expressions.assign_columns(
        df=persons,
        model_settings=model_settings.get('annotate_persons'),
        trace_label=tracing.extend_trace_label(trace_label,
                                               'annotate_persons'))

    pipeline.replace_table("persons", persons)

    tracing.print_summary('mandatory_tour_frequency',
                          persons.mandatory_tour_frequency,
                          value_counts=True)

    if trace_hh_id:
        tracing.trace_df(mandatory_tours,
                         label="mandatory_tour_frequency.mandatory_tours",
                         warn_if_empty=True)

        tracing.trace_df(persons,
                         label="mandatory_tour_frequency.persons",
                         warn_if_empty=True)
Beispiel #18
0
def non_mandatory_tour_frequency(persons, persons_merged, chunk_size,
                                 trace_hh_id):
    """
    This model predicts the frequency of making non-mandatory trips
    (alternatives for this model come from a separate csv file which is
    configured by the user) - these trips include escort, shopping, othmaint,
    othdiscr, eatout, and social trips in various combination.
    """

    trace_label = 'non_mandatory_tour_frequency'
    model_settings_file_name = 'non_mandatory_tour_frequency.yaml'

    model_settings = config.read_model_settings(model_settings_file_name)

    # FIXME kind of tacky both that we know to add this here and del it below
    # 'tot_tours' is used in model_spec expressions
    alternatives = simulate.read_model_alts(
        'non_mandatory_tour_frequency_alternatives.csv', set_index=None)
    alternatives['tot_tours'] = alternatives.sum(axis=1)

    # filter based on results of CDAP
    choosers = persons_merged.to_frame()
    choosers = choosers[choosers.cdap_activity.isin(['M', 'N'])]

    # - preprocessor
    preprocessor_settings = model_settings.get('preprocessor', None)
    if preprocessor_settings:

        locals_dict = {'person_max_window': person_max_window}

        expressions.assign_columns(df=choosers,
                                   model_settings=preprocessor_settings,
                                   locals_dict=locals_dict,
                                   trace_label=trace_label)

    logger.info("Running non_mandatory_tour_frequency with %d persons",
                len(choosers))

    constants = config.get_model_constants(model_settings)

    model_spec = simulate.read_model_spec(file_name=model_settings['SPEC'])
    spec_segments = model_settings.get('SPEC_SEGMENTS', {})

    # segment by person type and pick the right spec for each person type
    choices_list = []
    for segment_settings in spec_segments:

        segment_name = segment_settings['NAME']
        ptype = segment_settings['PTYPE']

        # pick the spec column for the segment
        segment_spec = model_spec[[segment_name]]

        chooser_segment = choosers[choosers.ptype == ptype]

        logger.info("Running segment '%s' of size %d", segment_name,
                    len(chooser_segment))

        if len(chooser_segment) == 0:
            # skip empty segments
            continue

        estimator = \
            estimation.manager.begin_estimation(model_name=segment_name, bundle_name='non_mandatory_tour_frequency')

        coefficients_df = simulate.read_model_coefficients(segment_settings)
        segment_spec = simulate.eval_coefficients(segment_spec,
                                                  coefficients_df, estimator)

        if estimator:
            estimator.write_spec(model_settings, bundle_directory=True)
            estimator.write_model_settings(model_settings,
                                           model_settings_file_name,
                                           bundle_directory=True)
            # preserving coefficients file name makes bringing back updated coefficients more straightforward
            estimator.write_coefficients(coefficients_df, segment_settings)
            estimator.write_choosers(chooser_segment)
            estimator.write_alternatives(alternatives, bundle_directory=True)

            # FIXME #interaction_simulate_estimation_requires_chooser_id_in_df_column
            #  shuold we do it here or have interaction_simulate do it?
            # chooser index must be duplicated in column or it will be omitted from interaction_dataset
            # estimation requires that chooser_id is either in index or a column of interaction_dataset
            # so it can be reformatted (melted) and indexed by chooser_id and alt_id
            assert chooser_segment.index.name == 'person_id'
            assert 'person_id' not in chooser_segment.columns
            chooser_segment['person_id'] = chooser_segment.index

            # FIXME set_alt_id - do we need this for interaction_simulate estimation bundle tables?
            estimator.set_alt_id('alt_id')

            estimator.set_chooser_id(chooser_segment.index.name)

        choices = interaction_simulate(
            chooser_segment,
            alternatives,
            spec=segment_spec,
            locals_d=constants,
            chunk_size=chunk_size,
            trace_label='non_mandatory_tour_frequency.%s' % segment_name,
            trace_choice_name='non_mandatory_tour_frequency',
            estimator=estimator)

        if estimator:
            estimator.write_choices(choices)
            choices = estimator.get_survey_values(
                choices, 'persons', 'non_mandatory_tour_frequency')
            estimator.write_override_choices(choices)
            estimator.end_estimation()

        choices_list.append(choices)

        # FIXME - force garbage collection?
        force_garbage_collect()

    del alternatives['tot_tours']  # del tot_tours column we added above

    # The choice value 'non_mandatory_tour_frequency' assigned by interaction_simulate
    # is the index value of the chosen alternative in the alternatives table.
    choices = pd.concat(choices_list).sort_index()

    # add non_mandatory_tour_frequency column to persons
    persons = persons.to_frame()
    # we expect there to be an alt with no tours - which we can use to backfill non-travelers
    no_tours_alt = (alternatives.sum(axis=1) == 0).index[0]
    # need to reindex as we only handled persons with cdap_activity in ['M', 'N']
    persons['non_mandatory_tour_frequency'] = \
        choices.reindex(persons.index).fillna(no_tours_alt).astype(np.int8)
    """
    We have now generated non-mandatory tour frequencies, but they are attributes of the person table
    Now we create a "tours" table which has one row per tour that has been generated
    (and the person id it is associated with)

    But before we do that, we run an additional probablilistic step to extend/increase tour counts
    beyond the strict limits of the tour_frequency alternatives chosen above (which are currently limited
    to at most 2 escort tours and 1 each of shopping, othmaint, othdiscr, eatout, and social tours)

    The choice value 'non_mandatory_tour_frequency' assigned by interaction_simulate is simply the
    index value of the chosen alternative in the alternatives table.

    get counts of each of the tour type alternatives (so we can extend)
               escort  shopping  othmaint  othdiscr    eatout    social
    parent_id
    2588676         2         0         0         1         1         0
    2588677         0         1         0         1         0         0
    """

    # counts of each of the tour type alternatives (so we can extend)
    modeled_tour_counts = alternatives.loc[choices]
    modeled_tour_counts.index = choices.index  # assign person ids to the index

    # - extend_tour_counts - probabalistic
    extended_tour_counts = \
        extend_tour_counts(choosers, modeled_tour_counts.copy(), alternatives,
                           trace_hh_id, tracing.extend_trace_label(trace_label, 'extend_tour_counts'))

    num_modeled_tours = modeled_tour_counts.sum().sum()
    num_extended_tours = extended_tour_counts.sum().sum()
    logger.info("extend_tour_counts increased tour count by %s from %s to %s" %
                (num_extended_tours - num_modeled_tours, num_modeled_tours,
                 num_extended_tours))
    """
    create the non_mandatory tours based on extended_tour_counts
    """
    if estimator:
        override_tour_counts = \
            estimation.manager.get_survey_values(extended_tour_counts,
                                                 table_name='persons',
                                                 column_names=['_%s' % c for c in extended_tour_counts.columns])
        override_tour_counts = \
            override_tour_counts.rename(columns={('_%s' % c): c for c in extended_tour_counts.columns})
        logger.info(
            "estimation get_survey_values override_tour_counts %s changed cells"
            % (override_tour_counts != extended_tour_counts).sum().sum())
        extended_tour_counts = override_tour_counts
    """
    create the non_mandatory tours based on extended_tour_counts
    """
    non_mandatory_tours = process_non_mandatory_tours(persons,
                                                      extended_tour_counts)
    assert len(non_mandatory_tours) == extended_tour_counts.sum().sum()

    if estimator:

        # make sure they created the right tours
        survey_tours = estimation.manager.get_survey_table(
            'tours').sort_index()
        non_mandatory_survey_tours = survey_tours[survey_tours.tour_category ==
                                                  'non_mandatory']
        assert len(non_mandatory_survey_tours) == len(non_mandatory_tours)
        assert non_mandatory_survey_tours.index.equals(
            non_mandatory_tours.sort_index().index)

        # make sure they created tours with the expected tour_ids
        columns = ['person_id', 'household_id', 'tour_type', 'tour_category']
        survey_tours = \
            estimation.manager.get_survey_values(non_mandatory_tours,
                                                 table_name='tours',
                                                 column_names=columns)

        tours_differ = (non_mandatory_tours[columns] !=
                        survey_tours[columns]).any(axis=1)

        if tours_differ.any():
            print("tours_differ\n%s" % tours_differ)
            print("%s of %s tours differ" %
                  (tours_differ.sum(), len(tours_differ)))
            print("differing survey_tours\n%s" % survey_tours[tours_differ])
            print("differing modeled_tours\n%s" %
                  non_mandatory_tours[columns][tours_differ])

        assert (not tours_differ.any())

    pipeline.extend_table("tours", non_mandatory_tours)

    tracing.register_traceable_table('tours', non_mandatory_tours)
    pipeline.get_rn_generator().add_channel('tours', non_mandatory_tours)

    expressions.assign_columns(
        df=persons,
        model_settings=model_settings.get('annotate_persons'),
        trace_label=trace_label)

    pipeline.replace_table("persons", persons)

    tracing.print_summary('non_mandatory_tour_frequency',
                          persons.non_mandatory_tour_frequency,
                          value_counts=True)

    if trace_hh_id:
        tracing.trace_df(
            non_mandatory_tours,
            label="non_mandatory_tour_frequency.non_mandatory_tours",
            warn_if_empty=True)

        tracing.trace_df(choosers,
                         label="non_mandatory_tour_frequency.choosers",
                         warn_if_empty=True)

        tracing.trace_df(
            persons,
            label="non_mandatory_tour_frequency.annotated_persons",
            warn_if_empty=True)
Beispiel #19
0
def run_trip_destination(trips,
                         tours_merged,
                         estimator,
                         chunk_size,
                         trace_hh_id,
                         trace_label,
                         fail_some_trips_for_testing=False):
    """
    trip destination - main functionality separated from model step so it can be called iteratively

    Run the trip_destination model, assigning destinations for each (intermediate) trip
    (last trips already have a destination - either the tour primary destination or Home)

    Set trip destination and origin columns, and a boolean failed flag for any failed trips
    (destination for flagged failed trips will be set to -1)

    Parameters
    ----------
    trips
    tours_merged
    want_sample_table
    chunk_size
    trace_hh_id
    trace_label

    Returns
    -------

    """

    model_settings_file_name = 'trip_destination.yaml'
    model_settings = config.read_model_settings(model_settings_file_name)
    preprocessor_settings = model_settings.get('preprocessor', None)
    logsum_settings = config.read_model_settings(
        model_settings['LOGSUM_SETTINGS'])

    logsum_column_name = model_settings.get('DEST_CHOICE_LOGSUM_COLUMN_NAME')
    want_logsums = logsum_column_name is not None

    sample_table_name = model_settings.get('DEST_CHOICE_SAMPLE_TABLE_NAME')
    want_sample_table = config.setting(
        'want_dest_choice_sample_tables') and sample_table_name is not None

    land_use = inject.get_table('land_use')
    size_terms = inject.get_injectable('size_terms')
    network_los = inject.get_injectable('network_los')

    trips = trips.sort_index()
    trips['next_trip_id'] = np.roll(trips.index, -1)
    trips.next_trip_id = trips.next_trip_id.where(
        trips.trip_num < trips.trip_count, 0)

    # - initialize trip origin and destination to those of half-tour
    # (we will sequentially adjust intermediate trips origin and destination as we choose them)
    tour_destination = reindex(tours_merged.destination,
                               trips.tour_id).astype(np.int64)
    tour_origin = reindex(tours_merged.origin, trips.tour_id).astype(np.int64)
    trips['destination'] = np.where(trips.outbound, tour_destination,
                                    tour_origin)
    trips['origin'] = np.where(trips.outbound, tour_origin, tour_destination)
    trips['failed'] = False

    if estimator:
        # need to check or override non-intermediate trip destination
        # should check consistency of survey trips origin, destination with parent tour and subsequent/prior trip?
        # FIXME if not consistent, do we fail or override? (seems weird to override them to bad values?)

        # expect all the same trips
        survey_trips = estimator.get_survey_table('trips').sort_index()
        assert survey_trips.index.equals(trips.index)

        first = (survey_trips.trip_num == 1)
        last = (survey_trips.trip_num == trips.trip_count)

        # expect survey's outbound first trip origin to be same as half tour origin
        assert (
            survey_trips.origin[survey_trips.outbound
                                & first] == tour_origin[survey_trips.outbound
                                                        & first]).all()
        # expect outbound last trip destination to be same as half tour destination
        assert (survey_trips.destination[survey_trips.outbound & last] ==
                tour_destination[survey_trips.outbound & last]).all()

        # expect inbound first trip origin to be same as half tour destination
        assert (survey_trips.origin[~survey_trips.outbound & first] ==
                tour_destination[~survey_trips.outbound & first]).all()
        # expect inbound last trip destination to be same as half tour origin
        assert (survey_trips.destination[~survey_trips.outbound & last] ==
                tour_origin[~survey_trips.outbound & last]).all()

    # - filter tours_merged (AFTER copying destination and origin columns to trips)
    # tours_merged is used for logsums, we filter it here upfront to save space and time
    tours_merged_cols = logsum_settings['TOURS_MERGED_CHOOSER_COLUMNS']
    redundant_cols = model_settings.get(
        'REDUNDANT_TOURS_MERGED_CHOOSER_COLUMNS', [])
    if redundant_cols:
        tours_merged_cols = [
            c for c in tours_merged_cols if c not in redundant_cols
        ]

    tours_merged = tours_merged[tours_merged_cols]

    # - skims
    skim_hotel = SkimHotel(model_settings, network_los, trace_label)

    # - size_terms and alternatives
    alternatives = tour_destination_size_terms(land_use, size_terms, 'trip')

    # DataFrameMatrix alows us to treat dataframe as virtual a 2-D array, indexed by zone_id, purpose
    # e.g. size_terms.get(df.dest_zone_id, df.purpose)
    # returns a series of size_terms for each chooser's dest_zone_id and purpose with chooser index
    size_term_matrix = DataFrameMatrix(alternatives)

    # don't need size terms in alternatives, just zone_id index
    alternatives = alternatives.drop(alternatives.columns, axis=1)
    alternatives.index.name = model_settings['ALT_DEST_COL_NAME']

    sample_list = []

    # - process intermediate trips in ascending trip_num order
    intermediate = trips.trip_num < trips.trip_count
    if intermediate.any():

        first_trip_num = trips[intermediate].trip_num.min()
        last_trip_num = trips[intermediate].trip_num.max()

        # iterate over trips in ascending trip_num order
        for trip_num in range(first_trip_num, last_trip_num + 1):

            nth_trips = trips[intermediate & (trips.trip_num == trip_num)]
            nth_trace_label = tracing.extend_trace_label(
                trace_label, 'trip_num_%s' % trip_num)

            locals_dict = {'network_los': network_los}
            locals_dict.update(config.get_model_constants(model_settings))

            # - annotate nth_trips
            if preprocessor_settings:
                expressions.assign_columns(
                    df=nth_trips,
                    model_settings=preprocessor_settings,
                    locals_dict=locals_dict,
                    trace_label=nth_trace_label)

            logger.info("Running %s with %d trips", nth_trace_label,
                        nth_trips.shape[0])

            # - choose destination for nth_trips, segmented by primary_purpose
            choices_list = []
            for primary_purpose, trips_segment in nth_trips.groupby(
                    'primary_purpose'):
                choices, destination_sample = choose_trip_destination(
                    primary_purpose,
                    trips_segment,
                    alternatives,
                    tours_merged,
                    model_settings,
                    want_logsums,
                    want_sample_table,
                    size_term_matrix,
                    skim_hotel,
                    estimator,
                    chunk_size,
                    trace_hh_id,
                    trace_label=tracing.extend_trace_label(
                        nth_trace_label, primary_purpose))

                choices_list.append(choices)
                if want_sample_table:
                    assert destination_sample is not None
                    sample_list.append(destination_sample)

            destinations_df = pd.concat(choices_list)

            if fail_some_trips_for_testing:
                if len(destinations_df) > 0:
                    destinations_df = destinations_df.drop(
                        destinations_df.index[0])

            failed_trip_ids = nth_trips.index.difference(destinations_df.index)
            if failed_trip_ids.any():
                logger.warning(
                    "%s sidelining %s trips without viable destination alternatives"
                    % (nth_trace_label, failed_trip_ids.shape[0]))
                next_trip_ids = nth_trips.next_trip_id.reindex(failed_trip_ids)
                trips.loc[failed_trip_ids, 'failed'] = True
                trips.loc[failed_trip_ids, 'destination'] = -1
                trips.loc[next_trip_ids,
                          'origin'] = trips.loc[failed_trip_ids].origin.values

            if len(destinations_df) == 0:
                assert failed_trip_ids.all()
                logger.warning(
                    f"all {len(nth_trips)} {primary_purpose} trip_num {trip_num} trips failed"
                )

            if len(destinations_df) > 0:
                # - assign choices to this trip's destinations
                # if estimator, then the choices will already have been overridden by trip_destination_simulate
                # because we need to overwrite choices before any failed choices are suppressed
                assign_in_place(trips,
                                destinations_df.choice.to_frame('destination'))
                if want_logsums:
                    assert 'logsum' in destinations_df.columns
                    assign_in_place(
                        trips,
                        destinations_df.logsum.to_frame(logsum_column_name))

                # - assign choice to next trip's origin
                destinations_df.index = nth_trips.next_trip_id.reindex(
                    destinations_df.index)
                assign_in_place(trips,
                                destinations_df.choice.to_frame('origin'))

    del trips['next_trip_id']

    if len(sample_list) > 0:
        save_sample_df = pd.concat(sample_list)
    else:
        # this could happen if no intermediate trips, or if no saved sample desired
        save_sample_df = None

    return trips, save_sample_df
Beispiel #20
0
def compute_logsums(choosers,
                    tour_purpose,
                    logsum_settings, model_settings,
                    network_los,
                    chunk_size,
                    chunk_tag,
                    trace_label):
    """

    Parameters
    ----------
    choosers
    tour_purpose
    logsum_settings
    model_settings
    network_los
    chunk_size
    trace_hh_id
    trace_label

    Returns
    -------
    logsums: pandas series
        computed logsums with same index as choosers
    """

    trace_label = tracing.extend_trace_label(trace_label, 'compute_logsums')
    logger.debug("Running compute_logsums with %d choosers" % choosers.shape[0])

    # compute_logsums needs to know name of dest column in interaction_sample
    orig_col_name = model_settings['CHOOSER_ORIG_COL_NAME']
    dest_col_name = model_settings['ALT_DEST_COL_NAME']

    # FIXME - are we ok with altering choosers (so caller doesn't have to set these)?
    assert ('in_period' not in choosers) and ('out_period' not in choosers)
    choosers['in_period'] = network_los.skim_time_period_label(model_settings['IN_PERIOD'])
    choosers['out_period'] = network_los.skim_time_period_label(model_settings['OUT_PERIOD'])

    assert ('duration' not in choosers)
    choosers['duration'] = model_settings['IN_PERIOD'] - model_settings['OUT_PERIOD']

    logsum_spec = simulate.read_model_spec(file_name=logsum_settings['SPEC'])
    coefficients = simulate.get_segment_coefficients(logsum_settings, tour_purpose)

    logsum_spec = simulate.eval_coefficients(logsum_spec, coefficients, estimator=None)

    nest_spec = config.get_logit_model_settings(logsum_settings)
    nest_spec = simulate.eval_nest_coefficients(nest_spec, coefficients, trace_label)

    locals_dict = {}
    # model_constants can appear in expressions
    locals_dict.update(config.get_model_constants(logsum_settings))
    # constrained coefficients can appear in expressions
    locals_dict.update(coefficients)

    # setup skim keys
    skim_dict = network_los.get_default_skim_dict()

    odt_skim_stack_wrapper = skim_dict.wrap_3d(orig_key=orig_col_name, dest_key=dest_col_name,
                                               dim3_key='out_period')
    dot_skim_stack_wrapper = skim_dict.wrap_3d(orig_key=dest_col_name, dest_key=orig_col_name,
                                               dim3_key='in_period')
    odr_skim_stack_wrapper = skim_dict.wrap_3d(orig_key=orig_col_name, dest_key=dest_col_name,
                                               dim3_key='in_period')
    dor_skim_stack_wrapper = skim_dict.wrap_3d(orig_key=dest_col_name, dest_key=orig_col_name,
                                               dim3_key='out_period')
    od_skim_stack_wrapper = skim_dict.wrap(orig_col_name, dest_col_name)

    skims = {
        "odt_skims": odt_skim_stack_wrapper,
        "dot_skims": dot_skim_stack_wrapper,
        "odr_skims": odr_skim_stack_wrapper,
        "dor_skims": dor_skim_stack_wrapper,
        "od_skims": od_skim_stack_wrapper,
        'orig_col_name': orig_col_name,
        'dest_col_name': dest_col_name
    }

    if network_los.zone_system == los.THREE_ZONE:
        # fixme - is this a lightweight object?
        tvpb = network_los.tvpb

        tvpb_logsum_odt = tvpb.wrap_logsum(orig_key=orig_col_name, dest_key=dest_col_name,
                                           tod_key='out_period', segment_key='demographic_segment',
                                           trace_label=trace_label, tag='tvpb_logsum_odt')
        tvpb_logsum_dot = tvpb.wrap_logsum(orig_key=dest_col_name, dest_key=orig_col_name,
                                           tod_key='in_period', segment_key='demographic_segment',
                                           trace_label=trace_label, tag='tvpb_logsum_dot')

        skims.update({
            'tvpb_logsum_odt': tvpb_logsum_odt,
            'tvpb_logsum_dot': tvpb_logsum_dot
        })

        # TVPB constants can appear in expressions
        locals_dict.update(network_los.setting('TVPB_SETTINGS.tour_mode_choice.CONSTANTS'))

    locals_dict.update(skims)

    # - run preprocessor to annotate choosers
    # allow specification of alternate preprocessor for nontour choosers
    preprocessor = model_settings.get('LOGSUM_PREPROCESSOR', 'preprocessor')
    preprocessor_settings = logsum_settings[preprocessor]

    if preprocessor_settings:

        simulate.set_skim_wrapper_targets(choosers, skims)

        expressions.assign_columns(
            df=choosers,
            model_settings=preprocessor_settings,
            locals_dict=locals_dict,
            trace_label=trace_label)

    logsums = simulate.simple_simulate_logsums(
        choosers,
        logsum_spec,
        nest_spec,
        skims=skims,
        locals_d=locals_dict,
        chunk_size=chunk_size,
        chunk_tag=chunk_tag,
        trace_label=trace_label)

    return logsums
Beispiel #21
0
def _schedule_tours(tours, persons_merged, alts, spec, logsum_tour_purpose,
                    model_settings, skims, timetable, window_id_col,
                    previous_tour, tour_owner_id_col, estimator,
                    tour_trace_label):
    """
    previous_tour stores values used to add columns that can be used in the spec
    which have to do with the previous tours per person.  Every column in the
    alternatives table is appended with the suffix "_previous" and made
    available.  So if your alternatives table has columns for start and end,
    then start_previous and end_previous will be set to the start and end of
    the most recent tour for a person.  The first time through,
    start_previous and end_previous are undefined, so make sure to protect
    with a tour_num >= 2 in the variable computation.

    Parameters
    ----------
    tours : DataFrame
        chunk of tours to schedule with unique timetable window_id_col
    persons_merged : DataFrame
        DataFrame of persons to be merged with tours containing attributes referenced
        by expressions in spec
    alts : DataFrame
        DataFrame of alternatives which represent all possible time slots.
        tdd_interaction_dataset function will use timetable to filter them to omit
        unavailable alternatives
    spec : DataFrame
        The spec which will be passed to interaction_simulate.
    model_settings : dict
    timetable : TimeTable
        timetable of timewidows for person (or subtour) with rows for tours[window_id_col]
    window_id_col : str
        column name from tours that identifies timetable owner (or None if tours index)
        - person_id for non/mandatory tours
        - parent_tour_id for subtours,
        - None (tours index) for joint_tours since every tour may have different participants)
    previous_tour: Series
        series with value of tdd_alt choice for last previous tour scheduled for
    tour_owner_id_col : str
        column name from tours that identifies 'owner' of this tour
        (person_id for non/mandatory tours, parent_tour_id for subtours,
        household_id for joint_tours)
    tour_trace_label

    Returns
    -------

    """

    logger.info("%s schedule_tours running %d tour choices" %
                (tour_trace_label, len(tours)))

    # merge persons into tours
    # avoid dual suffix for redundant columns names (e.g. household_id) that appear in both
    tours = pd.merge(tours,
                     persons_merged,
                     left_on='person_id',
                     right_index=True,
                     suffixes=('', '_y'))
    chunk.log_df(tour_trace_label, "tours", tours)

    # - add explicit window_id_col for timetable owner if it is index
    # if no timetable window_id_col specified, then add index as an explicit column
    # (this is not strictly necessary but its presence makes code simpler in several places)
    if window_id_col is None:
        window_id_col = tours.index.name
        tours[window_id_col] = tours.index

    # timetable can't handle multiple tours per window_id
    assert not tours[window_id_col].duplicated().any()

    # - build interaction dataset filtered to include only available tdd alts
    # dataframe columns start, end , duration, person_id, tdd
    # indexed (not unique) on tour_id
    choice_column = TDD_CHOICE_COLUMN
    alt_tdd = tdd_interaction_dataset(tours, alts, timetable, choice_column,
                                      window_id_col, tour_trace_label)
    print(f"tours {tours.shape} alts {alts.shape}")
    chunk.log_df(tour_trace_label, "alt_tdd", alt_tdd)

    # - add logsums
    if logsum_tour_purpose:
        logsums = \
            compute_logsums(alt_tdd, tours, logsum_tour_purpose, model_settings, skims, tour_trace_label)
    else:
        logsums = 0
    alt_tdd['mode_choice_logsum'] = logsums

    # - merge in previous tour columns
    # adds start_previous and end_previous, joins on index
    tours = \
        tours.join(get_previous_tour_by_tourid(tours[tour_owner_id_col], previous_tour, alts))
    chunk.log_df(tour_trace_label, "tours", tours)

    # - make choices
    locals_d = {'tt': timetable}
    constants = config.get_model_constants(model_settings)
    if constants is not None:
        locals_d.update(constants)

    preprocessor_settings = model_settings.get('ALTS_PREPROCESSOR', None)

    if preprocessor_settings and preprocessor_settings.get(
            logsum_tour_purpose):
        expressions.assign_columns(
            df=alt_tdd,
            model_settings=preprocessor_settings.get(logsum_tour_purpose),
            locals_dict=locals_d,
            trace_label=tour_trace_label)

    if estimator:
        # write choosers after annotation
        estimator.write_choosers(tours)
        estimator.set_alt_id(choice_column)
        estimator.write_interaction_sample_alternatives(alt_tdd)

    choices = interaction_sample_simulate(tours,
                                          alt_tdd,
                                          spec,
                                          choice_column=choice_column,
                                          locals_d=locals_d,
                                          chunk_size=0,
                                          trace_label=tour_trace_label,
                                          estimator=estimator)

    # - update previous_tour and timetable parameters

    # update previous_tour (series with most recent previous tdd choices) with latest values
    previous_tour.loc[tours[tour_owner_id_col]] = choices.values

    # update timetable with chosen tdd footprints
    timetable.assign(tours[window_id_col], choices)

    return choices
Beispiel #22
0
def parking_location(trips, trips_merged, land_use, network_los, chunk_size,
                     trace_hh_id):
    """
    Given a set of trips, each trip needs to have a parking location if
    it is eligible for remote parking.
    """

    trace_label = 'parking_location'
    model_settings = config.read_model_settings('parking_location_choice.yaml')
    alt_destination_col_name = model_settings['ALT_DEST_COL_NAME']

    preprocessor_settings = model_settings.get('PREPROCESSOR', None)

    trips_df = trips.to_frame()
    trips_merged_df = trips_merged.to_frame()
    land_use_df = land_use.to_frame()

    locals_dict = {'network_los': network_los}
    locals_dict.update(config.get_model_constants(model_settings))

    if preprocessor_settings:
        expressions.assign_columns(df=trips_merged_df,
                                   model_settings=preprocessor_settings,
                                   locals_dict=locals_dict,
                                   trace_label=trace_label)

    parking_locations, save_sample_df = run_parking_destination(
        model_settings,
        trips_merged_df,
        land_use_df,
        chunk_size=chunk_size,
        trace_hh_id=trace_hh_id,
        trace_label=trace_label,
    )

    assign_in_place(trips_df,
                    parking_locations.to_frame(alt_destination_col_name))

    pipeline.replace_table("trips", trips_df)

    if trace_hh_id:
        tracing.trace_df(trips_df,
                         label=trace_label,
                         slicer='trip_id',
                         index_label='trip_id',
                         warn_if_empty=True)

    if save_sample_df is not None:
        assert len(save_sample_df.index.get_level_values(0).unique()) == \
               len(trips_df[trips_df.trip_num < trips_df.trip_count])

        sample_table_name = model_settings.get(
            'PARKING_LOCATION_SAMPLE_TABLE_NAME')
        assert sample_table_name is not None

        logger.info("adding %s samples to %s" %
                    (len(save_sample_df), sample_table_name))

        # lest they try to put tour samples into the same table
        if pipeline.is_table(sample_table_name):
            raise RuntimeError("sample table %s already exists" %
                               sample_table_name)
        pipeline.extend_table(sample_table_name, save_sample_df)
Beispiel #23
0
def iterate_location_choice(model_settings, persons_merged, persons,
                            households, network_los, estimator, chunk_size,
                            trace_hh_id, locutor, trace_label):
    """
    iterate run_location_choice updating shadow pricing until convergence criteria satisfied
    or max_iterations reached.

    (If use_shadow_pricing not enabled, then just iterate once)

    Parameters
    ----------
    model_settings : dict
    persons_merged : injected table
    persons : injected table
    network_los : los.Network_LOS
    chunk_size : int
    trace_hh_id : int
    locutor : bool
        whether this process is the privileged logger of shadow_pricing when multiprocessing
    trace_label : str

    Returns
    -------
    adds choice column model_settings['DEST_CHOICE_COLUMN_NAME']
    adds logsum column model_settings['DEST_CHOICE_LOGSUM_COLUMN_NAME']- if provided
    adds annotations to persons table
    """

    chunk_tag = trace_label

    # boolean to filter out persons not needing location modeling (e.g. is_worker, is_student)
    chooser_filter_column = model_settings['CHOOSER_FILTER_COLUMN_NAME']

    dest_choice_column_name = model_settings['DEST_CHOICE_COLUMN_NAME']
    logsum_column_name = model_settings.get('DEST_CHOICE_LOGSUM_COLUMN_NAME')

    sample_table_name = model_settings.get('DEST_CHOICE_SAMPLE_TABLE_NAME')
    want_sample_table = config.setting(
        'want_dest_choice_sample_tables') and sample_table_name is not None

    persons_merged_df = persons_merged.to_frame()

    persons_merged_df = persons_merged_df[
        persons_merged[chooser_filter_column]]

    persons_merged_df.sort_index(
        inplace=True
    )  # interaction_sample expects chooser index to be monotonic increasing

    # chooser segmentation allows different sets coefficients for e.g. different income_segments or tour_types
    chooser_segment_column = model_settings['CHOOSER_SEGMENT_COLUMN_NAME']

    assert chooser_segment_column in persons_merged_df, \
        f"CHOOSER_SEGMENT_COLUMN '{chooser_segment_column}' not in persons_merged table."

    spc = shadow_pricing.load_shadow_price_calculator(model_settings)
    max_iterations = spc.max_iterations
    assert not (spc.use_shadow_pricing and estimator)

    logger.debug("%s max_iterations: %s" % (trace_label, max_iterations))

    for iteration in range(1, max_iterations + 1):

        if spc.use_shadow_pricing and iteration > 1:
            spc.update_shadow_prices()

        choices_df, save_sample_df = run_location_choice(
            persons_merged_df,
            network_los,
            shadow_price_calculator=spc,
            want_logsums=logsum_column_name is not None,
            want_sample_table=want_sample_table,
            estimator=estimator,
            model_settings=model_settings,
            chunk_size=chunk_size,
            chunk_tag=chunk_tag,
            trace_hh_id=trace_hh_id,
            trace_label=tracing.extend_trace_label(trace_label,
                                                   'i%s' % iteration))

        # choices_df is a pandas DataFrame with columns 'choice' and (optionally) 'logsum'
        if choices_df is None:
            break

        spc.set_choices(
            choices=choices_df['choice'],
            segment_ids=persons_merged_df[chooser_segment_column].reindex(
                choices_df.index))

        if locutor:
            spc.write_trace_files(iteration)

        if spc.use_shadow_pricing and spc.check_fit(iteration):
            logging.info("%s converged after iteration %s" % (
                trace_label,
                iteration,
            ))
            break

    # - shadow price table
    if locutor:
        if spc.use_shadow_pricing and 'SHADOW_PRICE_TABLE' in model_settings:
            inject.add_table(model_settings['SHADOW_PRICE_TABLE'],
                             spc.shadow_prices)
        if 'MODELED_SIZE_TABLE' in model_settings:
            inject.add_table(model_settings['MODELED_SIZE_TABLE'],
                             spc.modeled_size)

    persons_df = persons.to_frame()

    # add the choice values to the dest_choice_column in persons dataframe
    # We only chose school locations for the subset of persons who go to school
    # so we backfill the empty choices with -1 to code as no school location
    # names for location choice and (optional) logsums columns
    NO_DEST_ZONE = -1
    persons_df[dest_choice_column_name] = \
        choices_df['choice'].reindex(persons_df.index).fillna(NO_DEST_ZONE).astype(int)

    # add the dest_choice_logsum column to persons dataframe
    if logsum_column_name:
        persons_df[logsum_column_name] = \
            choices_df['logsum'].reindex(persons_df.index).astype('float')

    if save_sample_df is not None:
        # might be None for tiny samples even if sample_table_name was specified
        assert len(save_sample_df.index.get_level_values(0).unique()) == len(
            choices_df)
        # lest they try to put school and workplace samples into the same table
        if pipeline.is_table(sample_table_name):
            raise RuntimeError("dest choice sample table %s already exists" %
                               sample_table_name)
        pipeline.extend_table(sample_table_name, save_sample_df)

    # - annotate persons table
    if 'annotate_persons' in model_settings:
        expressions.assign_columns(
            df=persons_df,
            model_settings=model_settings.get('annotate_persons'),
            trace_label=tracing.extend_trace_label(trace_label,
                                                   'annotate_persons'))

        pipeline.replace_table("persons", persons_df)

        if trace_hh_id:
            tracing.trace_df(persons_df, label=trace_label, warn_if_empty=True)

    # - annotate households table
    if 'annotate_households' in model_settings:

        households_df = households.to_frame()
        expressions.assign_columns(
            df=households_df,
            model_settings=model_settings.get('annotate_households'),
            trace_label=tracing.extend_trace_label(trace_label,
                                                   'annotate_households'))
        pipeline.replace_table("households", households_df)

        if trace_hh_id:
            tracing.trace_df(households_df,
                             label=trace_label,
                             warn_if_empty=True)

    if logsum_column_name:
        tracing.print_summary(logsum_column_name,
                              choices_df['logsum'],
                              value_counts=True)

    return persons_df
def transit_pass_subsidy(persons_merged, persons, chunk_size, trace_hh_id):
    """
    Transit pass subsidy model.
    """

    trace_label = 'transit_pass_subsidy'
    model_settings_file_name = 'transit_pass_subsidy.yaml'

    choosers = persons_merged.to_frame()
    logger.info("Running %s with %d persons", trace_label, len(choosers))

    model_settings = config.read_model_settings(model_settings_file_name)
    estimator = estimation.manager.begin_estimation('transit_pass_subsidy')

    constants = config.get_model_constants(model_settings)

    # - preprocessor
    preprocessor_settings = model_settings.get('preprocessor', None)
    if preprocessor_settings:

        locals_d = {}
        if constants is not None:
            locals_d.update(constants)

        expressions.assign_columns(df=choosers,
                                   model_settings=preprocessor_settings,
                                   locals_dict=locals_d,
                                   trace_label=trace_label)

    model_spec = simulate.read_model_spec(file_name=model_settings['SPEC'])
    coefficients_df = simulate.read_model_coefficients(model_settings)
    model_spec = simulate.eval_coefficients(model_spec, coefficients_df,
                                            estimator)

    nest_spec = config.get_logit_model_settings(model_settings)

    if estimator:
        estimator.write_model_settings(model_settings,
                                       model_settings_file_name)
        estimator.write_spec(model_settings)
        estimator.write_coefficients(coefficients_df)
        estimator.write_choosers(choosers)

    choices = simulate.simple_simulate(
        choosers=choosers,
        spec=model_spec,
        nest_spec=nest_spec,
        locals_d=constants,
        chunk_size=chunk_size,
        trace_label=trace_label,
        trace_choice_name='transit_pass_subsidy',
        estimator=estimator)

    if estimator:
        estimator.write_choices(choices)
        choices = estimator.get_survey_values(choices, 'persons',
                                              'transit_pass_subsidy')
        estimator.write_override_choices(choices)
        estimator.end_estimation()

    persons = persons.to_frame()
    persons['transit_pass_subsidy'] = choices.reindex(persons.index)

    pipeline.replace_table("persons", persons)

    tracing.print_summary('transit_pass_subsidy',
                          persons.transit_pass_subsidy,
                          value_counts=True)

    if trace_hh_id:
        tracing.trace_df(persons, label=trace_label, warn_if_empty=True)
Beispiel #25
0
def joint_tour_scheduling(tours, persons_merged, tdd_alts, chunk_size,
                          trace_hh_id):
    """
    This model predicts the departure time and duration of each joint tour
    """
    trace_label = 'joint_tour_scheduling'

    model_settings_file_name = 'joint_tour_scheduling.yaml'
    model_settings = config.read_model_settings(model_settings_file_name)

    tours = tours.to_frame()
    joint_tours = tours[tours.tour_category == 'joint']

    # - if no joint tours
    if joint_tours.shape[0] == 0:
        tracing.no_results(trace_label)
        return

    # use inject.get_table as this won't exist if there are no joint_tours
    joint_tour_participants = inject.get_table(
        'joint_tour_participants').to_frame()

    persons_merged = persons_merged.to_frame()

    logger.info("Running %s with %d joint tours", trace_label,
                joint_tours.shape[0])

    # it may seem peculiar that we are concerned with persons rather than households
    # but every joint tour is (somewhat arbitrarily) assigned a "primary person"
    # some of whose characteristics are used in the spec
    # and we get household attributes along with person attributes in persons_merged
    persons_merged = persons_merged[persons_merged.num_hh_joint_tours > 0]

    # since a households joint tours each potentially different participants
    # they may also have different joint tour masks (free time of all participants)
    # so we have to either chunk processing by joint_tour_num and build timetable by household
    # or build timetables by unique joint_tour

    constants = config.get_model_constants(model_settings)

    # - run preprocessor to annotate choosers
    preprocessor_settings = model_settings.get('preprocessor', None)
    if preprocessor_settings:

        locals_d = {}
        if constants is not None:
            locals_d.update(constants)

        expressions.assign_columns(df=joint_tours,
                                   model_settings=preprocessor_settings,
                                   locals_dict=locals_d,
                                   trace_label=trace_label)

    timetable = inject.get_injectable("timetable")

    estimator = estimation.manager.begin_estimation('joint_tour_scheduling')

    model_spec = simulate.read_model_spec(file_name=model_settings['SPEC'])
    coefficients_df = simulate.read_model_coefficients(model_settings)
    model_spec = simulate.eval_coefficients(model_spec, coefficients_df,
                                            estimator)

    if estimator:
        estimator.write_model_settings(model_settings,
                                       model_settings_file_name)
        estimator.write_spec(model_settings)
        estimator.write_coefficients(coefficients_df)
        timetable.begin_transaction(estimator)

    choices = vectorize_joint_tour_scheduling(joint_tours,
                                              joint_tour_participants,
                                              persons_merged,
                                              tdd_alts,
                                              timetable,
                                              spec=model_spec,
                                              model_settings=model_settings,
                                              estimator=estimator,
                                              chunk_size=chunk_size,
                                              trace_label=trace_label)

    if estimator:
        estimator.write_choices(choices)
        choices = estimator.get_survey_values(choices, 'tours', 'tdd')
        estimator.write_override_choices(choices)
        estimator.end_estimation()

        # update timetable to reflect the override choices (assign tours in tour_num order)
        timetable.rollback()
        for tour_num, nth_tours in joint_tours.groupby('tour_num', sort=True):
            nth_participants = \
                joint_tour_participants[joint_tour_participants.tour_id.isin(nth_tours.index)]

            estimator.log(
                "assign timetable for %s participants in %s tours with tour_num %s"
                % (len(nth_participants), len(nth_tours), tour_num))
            # - update timetables of all joint tour participants
            timetable.assign(nth_participants.person_id,
                             reindex(choices, nth_participants.tour_id))

    timetable.replace_table()

    # choices are tdd alternative ids
    # we want to add start, end, and duration columns to tours, which we have in tdd_alts table
    choices = pd.merge(choices.to_frame('tdd'),
                       tdd_alts,
                       left_on=['tdd'],
                       right_index=True,
                       how='left')

    assign_in_place(tours, choices)
    pipeline.replace_table("tours", tours)

    # updated df for tracing
    joint_tours = tours[tours.tour_category == 'joint']

    if trace_hh_id:
        tracing.trace_df(joint_tours,
                         label="joint_tour_scheduling",
                         slicer='household_id')
Beispiel #26
0
def work_from_home(persons_merged, persons, chunk_size, trace_hh_id):
    """
    This model predicts whether a person (worker) works from home. The output
    from this model is TRUE (if works from home) or FALSE (works away from home).
    The workplace location choice is overridden for workers who work from home
    and set to -1.

    """

    trace_label = 'work_from_home'
    model_settings_file_name = 'work_from_home.yaml'

    choosers = persons_merged.to_frame()
    choosers = choosers[choosers.workplace_zone_id > -1]
    logger.info("Running %s with %d persons", trace_label, len(choosers))

    model_settings = config.read_model_settings(model_settings_file_name)
    estimator = estimation.manager.begin_estimation('work_from_home')

    constants = config.get_model_constants(model_settings)
    work_from_home_alt = model_settings['WORK_FROM_HOME_ALT']

    # - preprocessor
    preprocessor_settings = model_settings.get('preprocessor', None)
    if preprocessor_settings:

        locals_d = {}
        if constants is not None:
            locals_d.update(constants)

        expressions.assign_columns(df=choosers,
                                   model_settings=preprocessor_settings,
                                   locals_dict=locals_d,
                                   trace_label=trace_label)

    model_spec = simulate.read_model_spec(file_name=model_settings['SPEC'])
    coefficients_df = simulate.read_model_coefficients(model_settings)

    nest_spec = config.get_logit_model_settings(model_settings)

    if estimator:
        estimator.write_model_settings(model_settings,
                                       model_settings_file_name)
        estimator.write_spec(model_settings)
        estimator.write_coefficients(coefficients_df)
        estimator.write_choosers(choosers)

    # - iterative what-if if specified
    iterations = model_settings.get('WORK_FROM_HOME_ITERATIONS', 1)
    iterations_coefficient_constant = model_settings.get(
        'WORK_FROM_HOME_COEFFICIENT_CONSTANT', None)
    iterations_target_percent = model_settings.get(
        'WORK_FROM_HOME_TARGET_PERCENT', None)
    iterations_target_percent_tolerance = model_settings.get(
        'WORK_FROM_HOME_TARGET_PERCENT_TOLERANCE', None)

    for iteration in range(iterations):

        logger.info("Running %s with %d persons iteration %d", trace_label,
                    len(choosers), iteration)

        # re-read spec to reset substitution
        model_spec = simulate.read_model_spec(file_name=model_settings['SPEC'])
        model_spec = simulate.eval_coefficients(model_spec, coefficients_df,
                                                estimator)

        choices = simulate.simple_simulate(choosers=choosers,
                                           spec=model_spec,
                                           nest_spec=nest_spec,
                                           locals_d=constants,
                                           chunk_size=chunk_size,
                                           trace_label=trace_label,
                                           trace_choice_name='work_from_home',
                                           estimator=estimator)

        if iterations_target_percent is not None:
            current_percent = ((choices == work_from_home_alt).sum() /
                               len(choices))
            logger.info(
                "Running %s iteration %i current percent %f target percent %f",
                trace_label, iteration, current_percent,
                iterations_target_percent)

            if current_percent <= (iterations_target_percent +
                                   iterations_target_percent_tolerance
                                   ) and current_percent >= (
                                       iterations_target_percent -
                                       iterations_target_percent_tolerance):
                logger.info(
                    "Running %s iteration %i converged with coefficient %f",
                    trace_label, iteration,
                    coefficients_df.value[iterations_coefficient_constant])
                break

            else:
                new_value = np.log(
                    iterations_target_percent /
                    np.maximum(current_percent, 0.0001)
                ) + coefficients_df.value[iterations_coefficient_constant]
                coefficients_df.value[
                    iterations_coefficient_constant] = new_value
                logger.info(
                    "Running %s iteration %i new coefficient for next iteration %f",
                    trace_label, iteration, new_value)
                iteration = iteration + 1

    choices = (choices == work_from_home_alt)

    dest_choice_column_name = model_settings['DEST_CHOICE_COLUMN_NAME']

    if estimator:
        estimator.write_choices(choices)
        choices = estimator.get_survey_values(choices, 'persons',
                                              'work_from_home')
        estimator.write_override_choices(choices)
        estimator.end_estimation()

    persons = persons.to_frame()
    persons['work_from_home'] = choices.reindex(
        persons.index).fillna(0).astype(bool)
    persons[dest_choice_column_name] = np.where(
        persons.work_from_home is True, -1, persons[dest_choice_column_name])

    pipeline.replace_table("persons", persons)

    tracing.print_summary('work_from_home',
                          persons.work_from_home,
                          value_counts=True)

    if trace_hh_id:
        tracing.trace_df(persons, label=trace_label, warn_if_empty=True)
def joint_tour_participation(
        tours, persons_merged,
        chunk_size,
        trace_hh_id):
    """
    Predicts for each eligible person to participate or not participate in each joint tour.
    """
    trace_label = 'joint_tour_participation'
    model_settings_file_name = 'joint_tour_participation.yaml'
    model_settings = config.read_model_settings(model_settings_file_name)

    tours = tours.to_frame()
    joint_tours = tours[tours.tour_category == 'joint']

    # - if no joint tours
    if joint_tours.shape[0] == 0:
        add_null_results(model_settings, trace_label)
        return

    persons_merged = persons_merged.to_frame()

    # - create joint_tour_participation_candidates table
    candidates = joint_tour_participation_candidates(joint_tours, persons_merged)
    tracing.register_traceable_table('joint_tour_participants', candidates)
    pipeline.get_rn_generator().add_channel('joint_tour_participants', candidates)

    logger.info("Running joint_tours_participation with %d potential participants (candidates)" %
                candidates.shape[0])

    # - preprocessor
    preprocessor_settings = model_settings.get('preprocessor', None)
    if preprocessor_settings:

        locals_dict = {
            'person_time_window_overlap': person_time_window_overlap,
            'persons': persons_merged
        }

        expressions.assign_columns(
            df=candidates,
            model_settings=preprocessor_settings,
            locals_dict=locals_dict,
            trace_label=trace_label)

    # - simple_simulate

    estimator = estimation.manager.begin_estimation('joint_tour_participation')

    model_spec = simulate.read_model_spec(file_name=model_settings['SPEC'])
    coefficients_df = simulate.read_model_coefficients(model_settings)
    model_spec = simulate.eval_coefficients(model_spec, coefficients_df, estimator)

    nest_spec = config.get_logit_model_settings(model_settings)
    constants = config.get_model_constants(model_settings)

    if estimator:
        estimator.write_model_settings(model_settings, model_settings_file_name)
        estimator.write_spec(model_settings)
        estimator.write_coefficients(coefficients_df, model_settings)
        estimator.write_choosers(candidates)

    # add tour-based chunk_id so we can chunk all trips in tour together
    assert 'chunk_id' not in candidates.columns
    unique_household_ids = candidates.household_id.unique()
    household_chunk_ids = pd.Series(range(len(unique_household_ids)), index=unique_household_ids)
    candidates['chunk_id'] = reindex(household_chunk_ids, candidates.household_id)

    choices = simulate.simple_simulate_by_chunk_id(
        choosers=candidates,
        spec=model_spec,
        nest_spec=nest_spec,
        locals_d=constants,
        chunk_size=chunk_size,
        trace_label=trace_label,
        trace_choice_name='participation',
        custom_chooser=participants_chooser,
        estimator=estimator)

    # choice is boolean (participate or not)
    choice_col = model_settings.get('participation_choice', 'participate')
    assert choice_col in model_spec.columns, \
        "couldn't find participation choice column '%s' in spec"
    PARTICIPATE_CHOICE = model_spec.columns.get_loc(choice_col)

    participate = (choices == PARTICIPATE_CHOICE)

    if estimator:
        estimator.write_choices(choices)

        # we override the 'participate' boolean series, instead of raw alternative index in 'choices' series
        # its value depends on whether the candidate's 'participant_id' is in the joint_tour_participant index
        survey_participants_df = estimator.get_survey_table('joint_tour_participants')
        participate = pd.Series(choices.index.isin(survey_participants_df.index.values), index=choices.index)

        # but estimation software wants to know the choices value (alternative index)
        choices = participate.replace({True: PARTICIPATE_CHOICE, False: 1-PARTICIPATE_CHOICE})
        # estimator.write_override_choices(participate)  # write choices as boolean participate
        estimator.write_override_choices(choices)  # write choices as int alt indexes

        estimator.end_estimation()

    # satisfaction indexed by tour_id
    tour_satisfaction = get_tour_satisfaction(candidates, participate)

    assert tour_satisfaction.all()

    candidates['satisfied'] = reindex(tour_satisfaction, candidates.tour_id)

    PARTICIPANT_COLS = ['tour_id', 'household_id', 'person_id']
    participants = candidates[participate][PARTICIPANT_COLS].copy()

    # assign participant_num
    # FIXME do we want something smarter than the participant with the lowest person_id?
    participants['participant_num'] = \
        participants.sort_values(by=['tour_id', 'person_id']).\
        groupby('tour_id').cumcount() + 1

    pipeline.replace_table("joint_tour_participants", participants)

    # drop channel as we aren't using any more (and it has candidates that weren't chosen)
    pipeline.get_rn_generator().drop_channel('joint_tour_participants')

    # - assign joint tour 'point person' (participant_num == 1)
    point_persons = participants[participants.participant_num == 1]
    joint_tours['person_id'] = point_persons.set_index('tour_id').person_id

    # update number_of_participants which was initialized to 1
    joint_tours['number_of_participants'] = participants.groupby('tour_id').size()

    assign_in_place(tours, joint_tours[['person_id', 'number_of_participants']])

    pipeline.replace_table("tours", tours)

    # - run annotations
    annotate_jtp(model_settings, trace_label)

    if trace_hh_id:
        tracing.trace_df(participants,
                         label="joint_tour_participation.participants")

        tracing.trace_df(joint_tours,
                         label="joint_tour_participation.joint_tours")
def joint_tour_composition(tours, households, persons, chunk_size,
                           trace_hh_id):
    """
    This model predicts the makeup of the travel party (adults, children, or mixed).
    """
    trace_label = 'joint_tour_composition'
    model_settings_file_name = 'joint_tour_composition.yaml'

    tours = tours.to_frame()
    joint_tours = tours[tours.tour_category == 'joint']

    # - if no joint tours
    if joint_tours.shape[0] == 0:
        add_null_results(trace_label, tours)
        return

    model_settings = config.read_model_settings(model_settings_file_name)
    estimator = estimation.manager.begin_estimation('joint_tour_composition')

    # - only interested in households with joint_tours
    households = households.to_frame()
    households = households[households.num_hh_joint_tours > 0]

    persons = persons.to_frame()
    persons = persons[persons.household_id.isin(households.index)]

    logger.info("Running joint_tour_composition with %d joint tours" %
                joint_tours.shape[0])

    # - run preprocessor
    preprocessor_settings = model_settings.get('preprocessor', None)
    if preprocessor_settings:

        locals_dict = {
            'persons': persons,
            'hh_time_window_overlap': hh_time_window_overlap
        }

        expressions.assign_columns(df=households,
                                   model_settings=preprocessor_settings,
                                   locals_dict=locals_dict,
                                   trace_label=trace_label)

    joint_tours_merged = pd.merge(joint_tours,
                                  households,
                                  left_on='household_id',
                                  right_index=True,
                                  how='left')

    # - simple_simulate
    model_spec = simulate.read_model_spec(file_name=model_settings['SPEC'])
    coefficients_df = simulate.read_model_coefficients(model_settings)
    model_spec = simulate.eval_coefficients(model_spec, coefficients_df,
                                            estimator)

    nest_spec = config.get_logit_model_settings(model_settings)
    constants = config.get_model_constants(model_settings)

    if estimator:
        estimator.write_spec(model_settings)
        estimator.write_model_settings(model_settings,
                                       model_settings_file_name)
        estimator.write_coefficients(coefficients_df, model_settings)
        estimator.write_choosers(joint_tours_merged)

    choices = simulate.simple_simulate(choosers=joint_tours_merged,
                                       spec=model_spec,
                                       nest_spec=nest_spec,
                                       locals_d=constants,
                                       chunk_size=chunk_size,
                                       trace_label=trace_label,
                                       trace_choice_name='composition',
                                       estimator=estimator)

    # convert indexes to alternative names
    choices = pd.Series(model_spec.columns[choices.values],
                        index=choices.index)

    if estimator:
        estimator.write_choices(choices)
        choices = estimator.get_survey_values(choices, 'tours', 'composition')
        estimator.write_override_choices(choices)
        estimator.end_estimation()

    # add composition column to tours for tracing
    joint_tours['composition'] = choices

    # reindex since we ran model on a subset of households
    tours['composition'] = choices.reindex(tours.index).fillna('').astype(str)
    pipeline.replace_table("tours", tours)

    tracing.print_summary('joint_tour_composition',
                          joint_tours.composition,
                          value_counts=True)

    if trace_hh_id:
        tracing.trace_df(joint_tours,
                         label="joint_tour_composition.joint_tours",
                         slicer='household_id')
Beispiel #29
0
def run_tour_scheduling(model_name, chooser_tours, persons_merged, tdd_alts,
                        tour_segment_col, chunk_size, trace_hh_id):

    trace_label = model_name
    model_settings_file_name = f'{model_name}.yaml'

    model_settings = config.read_model_settings(model_settings_file_name)

    if 'LOGSUM_SETTINGS' in model_settings:
        logsum_settings = config.read_model_settings(
            model_settings['LOGSUM_SETTINGS'])
        logsum_columns = logsum_settings.get('LOGSUM_CHOOSER_COLUMNS', [])
    else:
        logsum_columns = []

    # - filter chooser columns for both logsums and simulate
    model_columns = model_settings.get('SIMULATE_CHOOSER_COLUMNS', [])
    chooser_columns = logsum_columns + [
        c for c in model_columns if c not in logsum_columns
    ]

    persons_merged = expressions.filter_chooser_columns(
        persons_merged, chooser_columns)

    timetable = inject.get_injectable("timetable")

    # - run preprocessor to annotate choosers
    preprocessor_settings = model_settings.get('preprocessor', None)
    if preprocessor_settings:
        locals_d = {'tt': timetable}
        locals_d.update(config.get_model_constants(model_settings))

        expressions.assign_columns(df=chooser_tours,
                                   model_settings=preprocessor_settings,
                                   locals_dict=locals_d,
                                   trace_label=trace_label)

    estimators = {}
    if 'TOUR_SPEC_SEGMENTS' in model_settings:
        # load segmented specs
        spec_segment_settings = model_settings.get('SPEC_SEGMENTS', {})
        specs = {}
        for spec_segment_name, spec_settings in spec_segment_settings.items():

            bundle_name = f'{model_name}_{spec_segment_name}'

            # estimator for this tour_segment
            estimator = estimation.manager.begin_estimation(
                model_name=bundle_name, bundle_name=bundle_name)

            spec_file_name = spec_settings['SPEC']
            model_spec = simulate.read_model_spec(file_name=spec_file_name)
            coefficients_df = simulate.read_model_coefficients(spec_settings)
            specs[spec_segment_name] = simulate.eval_coefficients(
                model_spec, coefficients_df, estimator)

            if estimator:
                estimators[spec_segment_name] = estimator  # add to local list
                estimator.write_model_settings(model_settings,
                                               model_settings_file_name)
                estimator.write_spec(spec_settings)
                estimator.write_coefficients(coefficients_df, spec_settings)

        # - spec dict segmented by primary_purpose
        tour_segment_settings = model_settings.get('TOUR_SPEC_SEGMENTS', {})
        tour_segments = {}
        for tour_segment_name, spec_segment_name in tour_segment_settings.items(
        ):
            tour_segments[tour_segment_name] = {}
            tour_segments[tour_segment_name][
                'spec_segment_name'] = spec_segment_name
            tour_segments[tour_segment_name]['spec'] = specs[spec_segment_name]
            tour_segments[tour_segment_name]['estimator'] = estimators.get(
                spec_segment_name)

        # default tour_segment_col to 'tour_type' if segmented spec and tour_segment_col not specified
        if tour_segment_col is None and tour_segments:
            tour_segment_col = 'tour_type'

    else:
        # unsegmented spec
        assert 'SPEC_SEGMENTS' not in model_settings
        assert 'TOUR_SPEC_SEGMENTS' not in model_settings
        assert tour_segment_col is None

        estimator = estimation.manager.begin_estimation(model_name)

        spec_file_name = model_settings['SPEC']
        model_spec = simulate.read_model_spec(file_name=spec_file_name)
        coefficients_df = simulate.read_model_coefficients(model_settings)
        model_spec = simulate.eval_coefficients(model_spec, coefficients_df,
                                                estimator)

        if estimator:
            estimators[None] = estimator  # add to local list
            estimator.write_model_settings(model_settings,
                                           model_settings_file_name)
            estimator.write_spec(model_settings)
            estimator.write_coefficients(coefficients_df, model_settings)

        # - non_mandatory tour scheduling is not segmented by tour type
        tour_segments = {'spec': model_spec, 'estimator': estimator}

    if estimators:
        timetable.begin_transaction(list(estimators.values()))

    logger.info(f"Running {model_name} with %d tours", len(chooser_tours))
    choices = vts.vectorize_tour_scheduling(chooser_tours,
                                            persons_merged,
                                            tdd_alts,
                                            timetable,
                                            tour_segments=tour_segments,
                                            tour_segment_col=tour_segment_col,
                                            model_settings=model_settings,
                                            chunk_size=chunk_size,
                                            trace_label=trace_label)

    if estimators:
        # overrride choices for all estimators
        choices_list = []
        for spec_segment_name, estimator in estimators.items():
            if spec_segment_name:
                model_choices = choices[(
                    chooser_tours.tour_type == spec_segment_name)]
            else:
                model_choices = choices

            estimator.write_choices(model_choices)
            override_choices = estimator.get_survey_values(
                model_choices, 'tours', 'tdd')
            estimator.write_override_choices(override_choices)

            choices_list.append(override_choices)
            estimator.end_estimation()
        choices = pd.concat(choices_list)

        # update timetable to reflect the override choices (assign tours in tour_num order)
        timetable.rollback()
        for tour_num, nth_tours in chooser_tours.groupby('tour_num',
                                                         sort=True):
            timetable.assign(window_row_ids=nth_tours['person_id'],
                             tdds=choices.reindex(nth_tours.index))

    timetable.replace_table()

    # choices are tdd alternative ids
    # we want to add start, end, and duration columns to tours, which we have in tdd_alts table
    choices = pd.merge(choices.to_frame('tdd'),
                       tdd_alts,
                       left_on=['tdd'],
                       right_index=True,
                       how='left')

    return choices
Beispiel #30
0
def free_parking(persons_merged, persons, chunk_size, trace_hh_id):
    """

    """

    trace_label = 'free_parking'
    model_settings_file_name = 'free_parking.yaml'

    choosers = persons_merged.to_frame()
    choosers = choosers[choosers.workplace_zone_id > -1]
    logger.info("Running %s with %d persons", trace_label, len(choosers))

    model_settings = config.read_model_settings(model_settings_file_name)
    estimator = estimation.manager.begin_estimation('free_parking')

    constants = config.get_model_constants(model_settings)

    # - preprocessor
    preprocessor_settings = model_settings.get('preprocessor', None)
    if preprocessor_settings:

        locals_d = {}
        if constants is not None:
            locals_d.update(constants)

        expressions.assign_columns(df=choosers,
                                   model_settings=preprocessor_settings,
                                   locals_dict=locals_d,
                                   trace_label=trace_label)

    model_spec = simulate.read_model_spec(file_name=model_settings['SPEC'])
    coefficients_df = simulate.read_model_coefficients(model_settings)
    model_spec = simulate.eval_coefficients(model_spec, coefficients_df,
                                            estimator)

    nest_spec = config.get_logit_model_settings(model_settings)

    if estimator:
        estimator.write_model_settings(model_settings,
                                       model_settings_file_name)
        estimator.write_spec(model_settings)
        estimator.write_coefficients(coefficients_df)
        estimator.write_choosers(choosers)

    choices = simulate.simple_simulate(
        choosers=choosers,
        spec=model_spec,
        nest_spec=nest_spec,
        locals_d=constants,
        chunk_size=chunk_size,
        trace_label=trace_label,
        trace_choice_name='free_parking_at_work',
        estimator=estimator)

    free_parking_alt = model_settings['FREE_PARKING_ALT']
    choices = (choices == free_parking_alt)

    if estimator:
        estimator.write_choices(choices)
        choices = estimator.get_survey_values(choices, 'persons',
                                              'free_parking_at_work')
        estimator.write_override_choices(choices)
        estimator.end_estimation()

    persons = persons.to_frame()
    persons['free_parking_at_work'] = choices.reindex(
        persons.index).fillna(0).astype(bool)

    pipeline.replace_table("persons", persons)

    tracing.print_summary('free_parking',
                          persons.free_parking_at_work,
                          value_counts=True)

    if trace_hh_id:
        tracing.trace_df(persons, label=trace_label, warn_if_empty=True)