Beispiel #1
0
def compute_utilities_for_attribute_tuple(network_los, scalar_attributes, data, chunk_size, trace_label):

    # scalar_attributes is a dict of attribute name/value pairs for this combination
    # (e.g. {'demographic_segment': 0, 'tod': 'AM', 'access_mode': 'walk'})

    logger.info(f"{trace_label} scalar_attributes: {scalar_attributes}")

    uid_calculator = network_los.tvpb.uid_calculator

    attributes_as_columns = \
        network_los.setting('TVPB_SETTINGS.tour_mode_choice.tap_tap_settings.attributes_as_columns', [])
    model_settings = \
        network_los.setting(f'TVPB_SETTINGS.tour_mode_choice.tap_tap_settings')
    model_constants = \
        network_los.setting(f'TVPB_SETTINGS.tour_mode_choice.CONSTANTS').copy()
    model_constants.update(scalar_attributes)

    data = data.reshape(uid_calculator.fully_populated_shape)

    # get od skim_offset dataframe with uid index corresponding to scalar_attributes
    choosers_df = uid_calculator.get_od_dataframe(scalar_attributes)

    # choosers_df is pretty big and was custom made for compute_utilities but we don't need to chunk_log it
    # since it is created outside of adaptive_chunked_choosers and so will show up in baseline
    assert not chunk.chunk_logging()  # otherwise we should chunk_log this

    chunk_tag = 'initialize_tvpb'  # all attribute_combinations can use same cached data for row_size calc

    for i, chooser_chunk, chunk_trace_label \
            in chunk.adaptive_chunked_choosers(choosers_df, chunk_size, trace_label, chunk_tag=chunk_tag):
        # we should count choosers_df as chunk overhead since its pretty big and was custom made for compute_utilities
        assert chooser_chunk._is_view  # otherwise copying it is wasteful
        chooser_chunk = chooser_chunk.copy()
        chunk.log_df(trace_label, 'attribute_chooser_chunk', chooser_chunk)

        # add any attribute columns specified as column attributes in settings (the rest will be scalars in locals_dict)
        for attribute_name in attributes_as_columns:
            chooser_chunk[attribute_name] = scalar_attributes[attribute_name]

        chunk.log_df(trace_label, 'attribute_chooser_chunk', chooser_chunk)

        utilities_df = \
            pathbuilder.compute_utilities(network_los,
                                          model_settings=model_settings,
                                          choosers=chooser_chunk,
                                          model_constants=model_constants,
                                          trace_label=trace_label)

        chunk.log_df(trace_label, 'utilities_df', utilities_df)

        assert len(utilities_df) == len(chooser_chunk)
        assert len(utilities_df.columns) == data.shape[1]
        assert not any_uninitialized(utilities_df.values)

        data[chooser_chunk.index.values, :] = utilities_df.values

        del chooser_chunk
        chunk.log_df(trace_label, 'attribute_chooser_chunk', None)

    logger.debug(f"{trace_label} updated utilities")
Beispiel #2
0
def compute_accessibility(land_use, accessibility, network_los, chunk_size,
                          trace_od):
    """
    Compute accessibility for each zone in land use file using expressions from accessibility_spec

    The actual results depend on the expressions in accessibility_spec, but this is initially
    intended to permit implementation of the mtc accessibility calculation as implemented by
    Accessibility.job

    Compute measures of accessibility used by the automobile ownership model.
    The accessibility measure first multiplies an employment variable by a mode-specific decay
    function.  The product reflects the difficulty of accessing the activities the farther
    (in terms of round-trip travel time) the jobs are from the location in question. The products
    to each destination zone are next summed over each origin zone, and the logarithm of the
    product mutes large differences.  The decay function on the walk accessibility measure is
    steeper than automobile or transit.  The minimum accessibility is zero.
    """

    trace_label = 'compute_accessibility'
    model_settings = config.read_model_settings('accessibility.yaml')
    assignment_spec = assign.read_assignment_spec(
        config.config_file_path('accessibility.csv'))

    accessibility_df = accessibility.to_frame()
    if len(accessibility_df.columns) > 0:
        logger.warning(
            f"accessibility table is not empty. Columns:{list(accessibility_df.columns)}"
        )
        raise RuntimeError(f"accessibility table is not empty.")

    constants = config.get_model_constants(model_settings)

    # only include the land_use columns needed by spec, as specified by land_use_columns model_setting
    land_use_columns = model_settings.get('land_use_columns', [])
    land_use_df = land_use.to_frame()
    land_use_df = land_use_df[land_use_columns]

    logger.info(
        f"Running {trace_label} with {len(accessibility_df.index)} orig zones {len(land_use_df)} dest zones"
    )

    accessibilities_list = []

    for i, chooser_chunk, chunk_trace_label in \
            chunk.adaptive_chunked_choosers(accessibility_df, chunk_size, trace_label):

        accessibilities = \
            compute_accessibilities_for_zones(chooser_chunk, land_use_df, assignment_spec,
                                              constants, network_los, trace_od, trace_label)
        accessibilities_list.append(accessibilities)

    accessibility_df = pd.concat(accessibilities_list)

    logger.info(
        f"{trace_label} computed accessibilities {accessibility_df.shape}")

    # - write table to pipeline
    pipeline.replace_table("accessibility", accessibility_df)
Beispiel #3
0
def compute_utilities_for_atttribute_tuple(network_los, scalar_attributes,
                                           data, chunk_size, trace_label):

    # scalar_attributes is a dict of attribute name/value pairs for this combination
    # (e.g. {'demographic_segment': 0, 'tod': 'AM', 'access_mode': 'walk'})

    logger.info(f"{trace_label} scalar_attributes: {scalar_attributes}")

    uid_calculator = network_los.tvpb.uid_calculator

    attributes_as_columns = \
        network_los.setting('TVPB_SETTINGS.tour_mode_choice.tap_tap_settings.attributes_as_columns', [])
    model_settings = \
        network_los.setting(f'TVPB_SETTINGS.tour_mode_choice.tap_tap_settings')
    model_constants = \
        network_los.setting(f'TVPB_SETTINGS.tour_mode_choice.CONSTANTS').copy()
    model_constants.update(scalar_attributes)

    data = data.reshape(uid_calculator.fully_populated_shape)

    # get od skim_offset dataframe with uid index corresponding to scalar_attributes
    choosers_df = uid_calculator.get_od_dataframe(scalar_attributes)

    row_size = chunk_size and initialize_tvpb_calc_row_size(
        choosers_df, network_los, trace_label)
    for i, chooser_chunk, chunk_trace_label \
            in chunk.adaptive_chunked_choosers(choosers_df, chunk_size, row_size, trace_label):

        # we should count choosers_df as chunk overhead since its pretty big and was custom made for compute_utilities
        # (call log_df from inside yield loop so it is visible to adaptive_chunked_choosers chunk_log)
        chunk.log_df(trace_label, 'choosers_df', choosers_df)

        # add any attribute columns specified as column attributes in settings (the rest will be scalars in locals_dict)
        for attribute_name in attributes_as_columns:
            chooser_chunk[attribute_name] = scalar_attributes[attribute_name]

        chunk.log_df(trace_label, 'chooser_chunk', chooser_chunk)

        utilities_df = \
            pathbuilder.compute_utilities(network_los,
                                          model_settings=model_settings,
                                          choosers=chooser_chunk,
                                          model_constants=model_constants,
                                          trace_label=trace_label)

        chunk.log_df(trace_label, 'utilities_df', utilities_df)

        assert len(utilities_df) == len(chooser_chunk)
        assert len(utilities_df.columns) == data.shape[1]
        assert not any_uninitialized(utilities_df.values)

        data[chooser_chunk.index.values, :] = utilities_df.values

    logger.debug(f"{trace_label} updated utilities")
Beispiel #4
0
def run_tour_scheduling_probabilistic(tours_df, scheduling_probs,
                                      probs_join_cols, depart_alt_base,
                                      chunk_size, trace_label, trace_hh_id):
    """Make probabilistic tour scheduling choices in chunks

    Parameters
    ----------
    tours_df : pandas.DataFrame
        table of tours
    scheduling_probs : pandas.DataFrame
        Probability lookup table for tour depature and return times
    probs_join_cols : str or list of strs
        Columns to use for merging probability lookup table with tours table
    depart_alt_base : int
        int to add to probs column index to get time period it represents.
        e.g. depart_alt_base = 5 means first column (column 0) represents 5 am
    chunk_size : int
        size of chooser chunks, set in main settings.yaml
    trace_label : str
        label to append to tracing logs and table names
    trace_hh_id : int
        households to trace

    Returns
    -------
    pandas.Series
        series of chosen alternative indices for each chooser
    """
    result_list = []
    for i, chooser_chunk, chunk_trace_label in chunk.adaptive_chunked_choosers(
            tours_df, chunk_size, trace_label, trace_label):
        choices = ps.make_scheduling_choices(
            chooser_chunk,
            'departure',
            scheduling_probs,
            probs_join_cols,
            depart_alt_base,
            first_trip_in_leg=False,
            report_failed_trips=True,
            trace_label=chunk_trace_label,
            trace_hh_id=trace_hh_id,
            trace_choice_col_name='depart_return',
            clip_earliest_latest=False)
        result_list.append(choices)

    choices = pd.concat(result_list)
    return choices
Beispiel #5
0
def run_trip_purpose(trips_df, estimator, chunk_size, trace_hh_id,
                     trace_label):
    """
    trip purpose - main functionality separated from model step so it can be called iteratively

    For each intermediate stop on a tour (i.e. trip other than the last trip outbound or inbound)
    each trip is assigned a purpose based on an observed frequency distribution

    The distribution should always be segmented by tour purpose and tour direction. By default it is also
    segmented by person type. The join columns can be overwritten using the "probs_join_cols" parameter in
    the model settings. The model will attempt to segment by trip depart time as well if necessary
    and depart time ranges are specified in the probability lookup table.

    Returns
    -------
    purpose: pandas.Series of purpose (str) indexed by trip_id
    """

    # uniform across trip_purpose
    chunk_tag = 'trip_purpose'

    model_settings_file_name = 'trip_purpose.yaml'
    model_settings = config.read_model_settings(model_settings_file_name)

    probs_join_cols = model_settings.get('probs_join_cols', PROBS_JOIN_COLUMNS)

    spec_file_name = model_settings.get('PROBS_SPEC', 'trip_purpose_probs.csv')
    probs_spec = pd.read_csv(config.config_file_path(spec_file_name),
                             comment='#')
    # FIXME for now, not really doing estimation for probabilistic model - just overwriting choices
    # besides, it isn't clear that named coefficients would be helpful if we had some form of estimation
    # coefficients_df = simulate.read_model_coefficients(model_settings)
    # probs_spec = map_coefficients(probs_spec, coefficients_df)

    if estimator:
        estimator.write_spec(model_settings, tag='PROBS_SPEC')
        estimator.write_model_settings(model_settings,
                                       model_settings_file_name)
        # estimator.write_coefficients(coefficients_df, model_settings)

    result_list = []

    # - last trip of outbound tour gets primary_purpose
    last_trip = (trips_df.trip_num == trips_df.trip_count)
    purpose = trips_df.primary_purpose[last_trip & trips_df.outbound]
    result_list.append(purpose)
    logger.info("assign purpose to %s last outbound trips", purpose.shape[0])

    # - last trip of inbound tour gets home (or work for atwork subtours)
    purpose = trips_df.primary_purpose[last_trip & ~trips_df.outbound]
    purpose = pd.Series(np.where(purpose == 'atwork', 'work', 'home'),
                        index=purpose.index)
    result_list.append(purpose)
    logger.info("assign purpose to %s last inbound trips", purpose.shape[0])

    # - intermediate stops (non-last trips) purpose assigned by probability table
    trips_df = trips_df[~last_trip]
    logger.info("assign purpose to %s intermediate trips", trips_df.shape[0])

    preprocessor_settings = model_settings.get('preprocessor', None)
    if preprocessor_settings:
        locals_dict = config.get_model_constants(model_settings)
        expressions.assign_columns(df=trips_df,
                                   model_settings=preprocessor_settings,
                                   locals_dict=locals_dict,
                                   trace_label=trace_label)

    use_depart_time = model_settings.get('use_depart_time', True)

    for i, trips_chunk, chunk_trace_label in \
            chunk.adaptive_chunked_choosers(trips_df, chunk_size, chunk_tag, trace_label):
        choices = choose_intermediate_trip_purpose(
            trips_chunk,
            probs_spec,
            estimator,
            probs_join_cols=probs_join_cols,
            use_depart_time=use_depart_time,
            trace_hh_id=trace_hh_id,
            trace_label=chunk_trace_label)

        result_list.append(choices)

        chunk.log_df(trace_label, f'result_list', result_list)

    if len(result_list) > 1:
        choices = pd.concat(result_list)

    return choices
Beispiel #6
0
def run_trip_scheduling_choice(spec, tours, skims, locals_dict, chunk_size,
                               trace_hh_id, trace_label):

    NUM_TOUR_LEGS = 3
    trace_label = tracing.extend_trace_label(trace_label,
                                             'interaction_sample_simulate')

    # FIXME: The duration, start, and end should be ints well before we get here...
    tours[TOUR_DURATION_COLUMN] = tours[TOUR_DURATION_COLUMN].astype(np.int8)

    # Setup boolean columns to make it easier to identify
    # intermediate stops later in the model.
    tours[HAS_OB_STOPS] = tours[NUM_OB_STOPS] >= 1
    tours[HAS_IB_STOPS] = tours[NUM_IB_STOPS] >= 1

    # Calculate a matrix with the appropriate alternative sizes
    # based on the total tour duration. This is used to calculate
    # chunk sizes.
    max_duration = tours[TOUR_DURATION_COLUMN].max()
    alt_sizes = generate_alternative_sizes(max_duration, NUM_TOUR_LEGS)

    # Assert the number of tour leg schedule alternatives for each tour
    tours[NUM_ALTERNATIVES] = 1
    tours.loc[tours[HAS_OB_STOPS] != tours[HAS_IB_STOPS],
              NUM_ALTERNATIVES] = tours[TOUR_DURATION_COLUMN] + 1
    tours.loc[tours[HAS_OB_STOPS] & tours[HAS_IB_STOPS], NUM_ALTERNATIVES] = \
        tours.apply(lambda x: alt_sizes[1, x.duration], axis=1)

    # If no intermediate stops on the tour, then then main leg duration
    # equals the tour duration and the intermediate durations are zero
    tours.loc[~tours[HAS_OB_STOPS] & ~tours[HAS_IB_STOPS],
              MAIN_LEG_DURATION] = tours[TOUR_DURATION_COLUMN]
    tours.loc[~tours[HAS_OB_STOPS] & ~tours[HAS_IB_STOPS],
              [IB_DURATION, OB_DURATION]] = 0

    # We only need to determine schedules for tours with intermediate stops
    indirect_tours = tours.loc[tours[HAS_OB_STOPS] | tours[HAS_IB_STOPS]]

    if len(indirect_tours) > 0:

        # Iterate through the chunks
        result_list = []
        for i, choosers, chunk_trace_label in \
                chunk.adaptive_chunked_choosers(indirect_tours, chunk_size, trace_label):

            # Sort the choosers and get the schedule alternatives
            choosers = choosers.sort_index()
            schedules = generate_schedule_alternatives(choosers).sort_index()

            # Assuming we did the max_alt_size calculation correctly,
            # we should get the same sizes here.
            assert choosers[NUM_ALTERNATIVES].sum() == schedules.shape[0]

            # Run the simulation
            choices = _interaction_sample_simulate(
                choosers=choosers,
                alternatives=schedules,
                spec=spec,
                choice_column=SCHEDULE_ID,
                allow_zero_probs=True,
                zero_prob_choice_val=-999,
                log_alt_losers=False,
                want_logsums=False,
                skims=skims,
                locals_d=locals_dict,
                trace_label=chunk_trace_label,
                trace_choice_name='trip_schedule_stage_1',
                estimator=None)

            assert len(choices.index) == len(choosers.index)

            choices = schedules[schedules[SCHEDULE_ID].isin(choices)]

            result_list.append(choices)

            chunk.log_df(trace_label, f'result_list', result_list)

        # FIXME: this will require 2X RAM
        # if necessary, could append to hdf5 store on disk:
        # http://pandas.pydata.org/pandas-docs/stable/io.html#id2
        if len(result_list) > 1:
            choices = pd.concat(result_list)

        assert len(choices.index) == len(indirect_tours.index)

        # The choices here are only the indirect tours, so the durations
        # need to be updated on the main tour dataframe.
        tours.update(choices[[MAIN_LEG_DURATION, OB_DURATION, IB_DURATION]])

    # Cleanup data types and drop temporary columns
    tours[[MAIN_LEG_DURATION, OB_DURATION, IB_DURATION]] = \
        tours[[MAIN_LEG_DURATION, OB_DURATION, IB_DURATION]].astype(np.int8)
    tours = tours.drop(columns=TEMP_COLS)

    return tours
Beispiel #7
0
def schedule_tours(tours, persons_merged, alts, spec, logsum_tour_purpose,
                   model_settings, timetable, timetable_window_id_col,
                   previous_tour, tour_owner_id_col, estimator, chunk_size,
                   tour_trace_label):
    """
    chunking wrapper for _schedule_tours

    While interaction_sample_simulate provides chunking support, the merged tours, persons
    dataframe and the tdd_interaction_dataset are very big, so we want to create them inside
    the chunking loop to minimize memory footprint. So we implement the chunking loop here,
    and pass a chunk_size of 0 to interaction_sample_simulate to disable its chunking support.

    """

    if not tours.index.is_monotonic_increasing:
        logger.info(
            "schedule_tours %s tours not monotonic_increasing - sorting df")
        tours = tours.sort_index()

    logger.info("%s schedule_tours running %d tour choices" %
                (tour_trace_label, len(tours)))

    # no more than one tour per timetable_window per call
    if timetable_window_id_col is None:
        assert not tours.index.duplicated().any()
    else:
        assert not tours[timetable_window_id_col].duplicated().any()

    if 'LOGSUM_SETTINGS' in model_settings:
        # we need skims to calculate tvpb skim overhead in 3_ZONE systems for use by calc_rows_per_chunk
        skims = skims_for_logsums(logsum_tour_purpose, model_settings,
                                  tour_trace_label)
    else:
        skims = None

    row_size = chunk_size and \
        tour_scheduling_calc_row_size(tours, persons_merged, alts, skims, spec, model_settings,  tour_trace_label)

    result_list = []
    for i, chooser_chunk, chunk_trace_label \
            in chunk.adaptive_chunked_choosers(tours, chunk_size, row_size, tour_trace_label):

        choices = _schedule_tours(chooser_chunk,
                                  persons_merged,
                                  alts,
                                  spec,
                                  logsum_tour_purpose,
                                  model_settings,
                                  skims,
                                  timetable,
                                  timetable_window_id_col,
                                  previous_tour,
                                  tour_owner_id_col,
                                  estimator,
                                  tour_trace_label=chunk_trace_label)

        result_list.append(choices)

        mem.force_garbage_collect()

    # FIXME: this will require 2X RAM
    # if necessary, could append to hdf5 store on disk:
    # http://pandas.pydata.org/pandas-docs/stable/io.html#id2
    if len(result_list) > 1:
        choices = pd.concat(result_list)

    assert len(choices.index == len(tours.index))

    return choices
Beispiel #8
0
def run_trip_purpose(trips_df, chunk_size, trace_hh_id, trace_label):
    """
    trip purpose - main functionality separated from model step so it can be called iteratively

    For each intermediate stop on a tour (i.e. trip other than the last trip outbound or inbound)
    Each trip is assigned a purpose based on an observed frequency distribution

    The distribution is segmented by tour purpose, tour direction, person type,
    and, optionally, trip depart time .

    Returns
    -------
    purpose: pandas.Series of purpose (str) indexed by trip_id
    """

    model_settings = config.read_model_settings('trip_purpose.yaml')
    probs_spec = trip_purpose_probs()

    result_list = []

    # - last trip of outbound tour gets primary_purpose
    last_trip = (trips_df.trip_num == trips_df.trip_count)
    purpose = trips_df.primary_purpose[last_trip & trips_df.outbound]
    result_list.append(purpose)
    logger.info("assign purpose to %s last outbound trips", purpose.shape[0])

    # - last trip of inbound tour gets home (or work for atwork subtours)
    purpose = trips_df.primary_purpose[last_trip & ~trips_df.outbound]
    purpose = pd.Series(np.where(purpose == 'atwork', 'Work', 'Home'),
                        index=purpose.index)
    result_list.append(purpose)
    logger.info("assign purpose to %s last inbound trips", purpose.shape[0])

    # - intermediate stops (non-last trips) purpose assigned by probability table
    trips_df = trips_df[~last_trip]
    logger.info("assign purpose to %s intermediate trips", trips_df.shape[0])

    preprocessor_settings = model_settings.get('preprocessor', None)
    if preprocessor_settings:
        locals_dict = config.get_model_constants(model_settings)
        expressions.assign_columns(df=trips_df,
                                   model_settings=preprocessor_settings,
                                   locals_dict=locals_dict,
                                   trace_label=trace_label)

    row_size = chunk_size and trip_purpose_calc_row_size(
        trips_df, probs_spec, trace_label)

    for i, trips_chunk, chunk_trace_label in \
            chunk.adaptive_chunked_choosers(trips_df, chunk_size, row_size, trace_label):

        choices = choose_intermediate_trip_purpose(
            trips_chunk,
            probs_spec,
            trace_hh_id,
            trace_label=chunk_trace_label)

        result_list.append(choices)

    if len(result_list) > 1:
        choices = pd.concat(result_list)

    return choices
Beispiel #9
0
def run_trip_purpose(
        trips_df,
        estimator,
        chunk_size,
        trace_hh_id,
        trace_label):
    """
    trip purpose - main functionality separated from model step so it can be called iteratively

    For each intermediate stop on a tour (i.e. trip other than the last trip outbound or inbound)
    Each trip is assigned a purpose based on an observed frequency distribution

    The distribution is segmented by tour purpose, tour direction, person type,
    and, optionally, trip depart time .

    Returns
    -------
    purpose: pandas.Series of purpose (str) indexed by trip_id
    """

    model_settings_file_name = 'trip_purpose.yaml'
    model_settings = config.read_model_settings(model_settings_file_name)

    spec_file_name = model_settings.get('PROBS_SPEC', 'trip_purpose_probs.csv')
    probs_spec = pd.read_csv(config.config_file_path(spec_file_name), comment='#')
    # FIXME for now, not really doing estimation for probabilistic model - just overwriting choices
    # besides, it isn't clear that named coefficients would be helpful if we had some form of estimation
    # coefficients_df = simulate.read_model_coefficients(model_settings)
    # probs_spec = map_coefficients(probs_spec, coefficients_df)

    if estimator:
        estimator.write_spec(model_settings, tag='PROBS_SPEC')
        estimator.write_model_settings(model_settings, model_settings_file_name)
        # estimator.write_coefficients(coefficients_df, model_settings)

    result_list = []

    # - last trip of outbound tour gets primary_purpose
    last_trip = (trips_df.trip_num == trips_df.trip_count)
    purpose = trips_df.primary_purpose[last_trip & trips_df.outbound]
    result_list.append(purpose)
    logger.info("assign purpose to %s last outbound trips", purpose.shape[0])

    # - last trip of inbound tour gets home (or work for atwork subtours)
    purpose = trips_df.primary_purpose[last_trip & ~trips_df.outbound]
    # FIXME should be lower case for consistency?
    purpose = pd.Series(np.where(purpose == 'atwork', 'Work', 'Home'), index=purpose.index)
    result_list.append(purpose)
    logger.info("assign purpose to %s last inbound trips", purpose.shape[0])

    # - intermediate stops (non-last trips) purpose assigned by probability table
    trips_df = trips_df[~last_trip]
    logger.info("assign purpose to %s intermediate trips", trips_df.shape[0])

    preprocessor_settings = model_settings.get('preprocessor', None)
    if preprocessor_settings:
        locals_dict = config.get_model_constants(model_settings)
        expressions.assign_columns(
            df=trips_df,
            model_settings=preprocessor_settings,
            locals_dict=locals_dict,
            trace_label=trace_label)

    row_size = chunk_size and trip_purpose_calc_row_size(trips_df, probs_spec, trace_label)

    for i, trips_chunk, chunk_trace_label in \
            chunk.adaptive_chunked_choosers(trips_df, chunk_size, row_size, trace_label):

        choices = choose_intermediate_trip_purpose(
            trips_chunk,
            probs_spec,
            estimator,
            trace_hh_id,
            trace_label=chunk_trace_label)

        result_list.append(choices)

    if len(result_list) > 1:
        choices = pd.concat(result_list)

    return choices