Esempio n. 1
0
def auto_ownership_simulate(households_merged, auto_ownership_spec,
                            auto_ownership_settings, trace_hh_id):
    """
    Auto ownership is a standard model which predicts how many cars a household
    with given characteristics owns
    """

    logger.info("Running auto_ownership_simulate with %d households" %
                len(households_merged))

    nest_spec = config.get_logit_model_settings(auto_ownership_settings)
    constants = config.get_model_constants(auto_ownership_settings)

    choices = asim.simple_simulate(choosers=households_merged.to_frame(),
                                   spec=auto_ownership_spec,
                                   nest_spec=nest_spec,
                                   locals_d=constants,
                                   trace_label=trace_hh_id
                                   and 'auto_ownership',
                                   trace_choice_name='auto_ownership')

    tracing.print_summary('auto_ownership', choices, value_counts=True)

    inject.add_column('households', 'auto_ownership', choices)

    pipeline.add_dependent_columns('households', 'households_autoown')

    if trace_hh_id:
        trace_columns = ['auto_ownership'
                         ] + inject.get_table('households_autoown').columns
        tracing.trace_df(inject.get_table('households').to_frame(),
                         label='auto_ownership',
                         columns=trace_columns,
                         warn_if_empty=True)
Esempio n. 2
0
def add_results_to_zones(results, zones, zone_data_table):
    # since zones_data_df is local in buffer.py, it can come back with extra columns:
    del_cols = list(
        set(zones.columns) - set(zone_data_table.to_frame().columns))
    zones.drop(del_cols, axis=1, inplace=True)

    for column in results.columns:
        inject.add_column('zone_data', column, results[column])
Esempio n. 3
0
def zones_with_network_nodes(zone_data, network, settings):
    zones_df = zone_data.to_frame()

    # attach the node_id of the nearest network node to each zone
    if 'net_node_id' not in zones_df.columns:
        zones_df['net_node_id'] = \
            network.get_node_ids(zones_df[settings['zones_lon']],
                                 zones_df[settings['zones_lat']])
        inject.add_column('zone_data', 'net_node_id', zones_df['net_node_id'])

    return zones_df
def mandatory_tour_frequency(persons_merged,
                             mandatory_tour_frequency_spec,
                             mandatory_tour_frequency_settings,
                             chunk_size,
                             trace_hh_id):
    """
    This model predicts the frequency of making mandatory trips (see the
    alternatives above) - these trips include work and school in some combination.
    """

    trace_label = 'mandatory_tour_frequency'

    choosers = persons_merged.to_frame()
    # filter based on results of CDAP
    choosers = choosers[choosers.cdap_activity == 'M']
    logger.info("Running mandatory_tour_frequency with %d persons" % len(choosers))

    nest_spec = config.get_logit_model_settings(mandatory_tour_frequency_settings)
    constants = config.get_model_constants(mandatory_tour_frequency_settings)

    choices = simulate.simple_simulate(
        choosers,
        spec=mandatory_tour_frequency_spec,
        nest_spec=nest_spec,
        locals_d=constants,
        chunk_size=chunk_size,
        trace_label=trace_label,
        trace_choice_name='mandatory_tour_frequency')

    # convert indexes to alternative names
    choices = pd.Series(
        mandatory_tour_frequency_spec.columns[choices.values],
        index=choices.index).reindex(persons_merged.local.index)

    tracing.print_summary('mandatory_tour_frequency', choices, value_counts=True)

    inject.add_column("persons", "mandatory_tour_frequency", choices)

    create_mandatory_tours(trace_hh_id)

    # add mandatory_tour-dependent columns (e.g. tour counts) to persons
    pipeline.add_dependent_columns("persons", "persons_mtf")

    if trace_hh_id:
        trace_columns = ['mandatory_tour_frequency']
        tracing.trace_df(inject.get_table('persons').to_frame(),
                         label="mandatory_tour_frequency.persons",
                         # columns=trace_columns,
                         warn_if_empty=True)
def integerize_final_seed_weights(settings, crosswalk, control_spec, incidence_table):

    crosswalk_df = crosswalk.to_frame()
    incidence_df = incidence_table.to_frame()
    control_spec = control_spec.to_frame()

    seed_geography = settings.get('seed_geography')
    seed_controls_df = get_control_table(seed_geography)

    seed_weights_df = get_weight_table(seed_geography)

    # FIXME - I assume we want to integerize using meta controls too?
    control_cols = control_spec.target
    assert (seed_controls_df.columns == control_cols).all()

    # determine master_control_index if specified in settings
    total_hh_control_col = settings.get('total_hh_control')

    # run balancer for each seed geography
    weight_list = []

    seed_ids = crosswalk_df[seed_geography].unique()
    for seed_id in seed_ids:

        logger.info("integerize_final_seed_weights seed id %s" % seed_id)

        # slice incidence rows for this seed geography
        seed_incidence = incidence_df[incidence_df[seed_geography] == seed_id]

        balanced_seed_weights = \
            seed_weights_df.loc[seed_weights_df[seed_geography] == seed_id, 'balanced_weight']

        trace_label = "%s_%s" % (seed_geography, seed_id)

        integer_weights, status = do_integerizing(
            trace_label=trace_label,
            control_spec=control_spec,
            control_totals=seed_controls_df.loc[seed_id],
            incidence_table=seed_incidence[control_cols],
            float_weights=balanced_seed_weights,
            total_hh_control_col=total_hh_control_col
        )

        weight_list.append(integer_weights)

    # bulk concat all seed level results
    integer_seed_weights = pd.concat(weight_list)

    inject.add_column(weight_table_name(seed_geography), 'integer_weight', integer_seed_weights)
Esempio n. 6
0
def cdap_simulate(persons_merged, cdap_settings, cdap_indiv_spec,
                  cdap_interaction_coefficients,
                  cdap_fixed_relative_proportions, chunk_size, trace_hh_id):
    """
    CDAP stands for Coordinated Daily Activity Pattern, which is a choice of
    high-level activity pattern for each person, in a coordinated way with other
    members of a person's household.

    Because Python requires vectorization of computation, there are some specialized
    routines in the cdap directory of activitysim for this purpose.  This module
    simply applies those utilities using the simulation framework.
    """

    persons_df = persons_merged.to_frame()

    constants = config.get_model_constants(cdap_settings)

    logger.info("Running cdap_simulate with %d persons" %
                len(persons_df.index))

    choices = run_cdap(
        persons=persons_df,
        cdap_indiv_spec=cdap_indiv_spec,
        cdap_interaction_coefficients=cdap_interaction_coefficients,
        cdap_fixed_relative_proportions=cdap_fixed_relative_proportions,
        locals_d=constants,
        chunk_size=chunk_size,
        trace_hh_id=trace_hh_id,
        trace_label='cdap')

    tracing.print_summary('cdap_activity',
                          choices.cdap_activity,
                          value_counts=True)

    print pd.crosstab(persons_df.ptype, choices.cdap_activity, margins=True)

    choices = choices.reindex(persons_merged.index)
    inject.add_column("persons", "cdap_activity", choices.cdap_activity)
    inject.add_column("persons", "cdap_rank", choices.cdap_rank)

    pipeline.add_dependent_columns("persons", "persons_cdap")
    pipeline.add_dependent_columns("households", "households_cdap")

    if trace_hh_id:

        tracing.trace_df(inject.get_table('persons_merged').to_frame(),
                         label="cdap",
                         columns=['ptype', 'cdap_rank', 'cdap_activity'],
                         warn_if_empty=True)
Esempio n. 7
0
def school_location_logsums(persons_merged, land_use, skim_dict, skim_stack,
                            school_location_sample, configs_dir, chunk_size,
                            trace_hh_id):
    """
    add logsum column to existing school_location_sample able

    logsum is calculated by running the mode_choice model for each sample (person, dest_taz) pair
    in school_location_sample, and computing the logsum of all the utilities

    +-------+--------------+----------------+------------+----------------+
    | PERID | dest_TAZ     | rand           | pick_count | logsum (added) |
    +=======+==============+================+============+================+
    | 23750 |  14          | 0.565502716034 | 4          |  1.85659498857 |
    +-------+--------------+----------------+------------+----------------+
    + 23750 | 16           | 0.711135838871 | 6          | 1.92315598631  |
    +-------+--------------+----------------+------------+----------------+
    + ...   |              |                |            |                |
    +-------+--------------+----------------+------------+----------------+
    | 23751 | 12           | 0.408038878552 | 1          | 2.40612135416  |
    +-------+--------------+----------------+------------+----------------+
    | 23751 | 14           | 0.972732479292 | 2          |  1.44009018355 |
    +-------+--------------+----------------+------------+----------------+
    """

    trace_label = 'school_location_logsums'

    school_location_settings = config.read_model_settings(
        configs_dir, 'school_location.yaml')

    alt_col_name = school_location_settings["ALT_COL_NAME"]
    chooser_col_name = 'TAZ'

    # FIXME - just using settings from tour_mode_choice
    logsum_settings = config.read_model_settings(configs_dir,
                                                 'tour_mode_choice.yaml')

    persons_merged = persons_merged.to_frame()
    school_location_sample = school_location_sample.to_frame()

    logger.info("Running school_location_sample with %s rows" %
                len(school_location_sample))

    # FIXME - MEMORY HACK - only include columns actually used in spec
    chooser_columns = school_location_settings['LOGSUM_CHOOSER_COLUMNS']
    persons_merged = persons_merged[chooser_columns]

    tracing.dump_df(DUMP, persons_merged, trace_label, 'persons_merged')

    logsums_list = []
    for school_type in ['university', 'highschool', 'gradeschool']:

        logsums_spec = mode_choice_logsums_spec(configs_dir, school_type)

        choosers = school_location_sample[school_location_sample['school_type']
                                          == school_type]

        choosers = pd.merge(choosers,
                            persons_merged,
                            left_index=True,
                            right_index=True,
                            how="left")

        choosers['in_period'] = skim_time_period_label(
            school_location_settings['IN_PERIOD'])
        choosers['out_period'] = skim_time_period_label(
            school_location_settings['OUT_PERIOD'])

        # FIXME - should do this in expression file?
        choosers['dest_topology'] = reindex(land_use.TOPOLOGY,
                                            choosers[alt_col_name])
        choosers['dest_density_index'] = reindex(land_use.density_index,
                                                 choosers[alt_col_name])

        tracing.dump_df(DUMP, choosers,
                        tracing.extend_trace_label(trace_label, school_type),
                        'choosers')

        logsums = compute_logsums(
            choosers, logsums_spec, logsum_settings, skim_dict, skim_stack,
            chooser_col_name, alt_col_name, chunk_size, trace_hh_id,
            tracing.extend_trace_label(trace_label, school_type))

        logsums_list.append(logsums)

    logsums = pd.concat(logsums_list)

    # add_column series should have an index matching the table to which it is being added
    # logsums does, since school_location_sample was on left side of merge creating choosers
    inject.add_column("school_location_sample", "mode_choice_logsum", logsums)
Esempio n. 8
0
def compute_accessibility(settings, accessibility_spec, accessibility_settings,
                          skim_dict, omx_file, land_use, trace_od):
    """
    Compute accessibility for each zone in land use file using expressions from accessibility_spec

    The actual results depend on the expressions in accessibility_spec, but this is initially
    intended to permit implementation of the mtc accessibility calculation as implemented by
    Accessibility.job

    Compute measures of accessibility used by the automobile ownership model.
    The accessibility measure first multiplies an employment variable by a mode-specific decay
    function.  The product reflects the difficulty of accessing the activities the farther
    (in terms of round-trip travel time) the jobs are from the location in question. The products
    to each destination zone are next summed over each origin zone, and the logarithm of the
    product mutes large differences.  The decay function on the walk accessibility measure is
    steeper than automobile or transit.  The minimum accessibility is zero.
    """

    logger.info("Running compute_accessibility")

    constants = config.get_model_constants(accessibility_settings)
    land_use_columns = accessibility_settings.get('land_use_columns', [])

    land_use_df = land_use.to_frame()

    zone_count = len(land_use_df.index)

    # create OD dataframe
    od_df = pd.DataFrame(
        data={
            'orig': np.repeat(np.asanyarray(land_use_df.index), zone_count),
            'dest': np.tile(np.asanyarray(land_use_df.index), zone_count)
        })

    if trace_od:
        trace_orig, trace_dest = trace_od
        trace_od_rows = (od_df.orig == trace_orig) & (od_df.dest == trace_dest)
    else:
        trace_od_rows = None

    # merge land_use_columns into od_df
    land_use_df = land_use_df[land_use_columns]
    od_df = pd.merge(od_df, land_use_df, left_on='dest',
                     right_index=True).sort_index()

    locals_d = {
        'log':
        np.log,
        'exp':
        np.exp,
        'skim_od':
        AccessibilitySkims(skim_dict, omx_file, zone_count),
        'skim_do':
        AccessibilitySkims(skim_dict, omx_file, zone_count, transpose=True)
    }
    if constants is not None:
        locals_d.update(constants)

    results, trace_results, trace_assigned_locals \
        = assign.assign_variables(accessibility_spec, od_df, locals_d, trace_rows=trace_od_rows)
    accessibility_df = pd.DataFrame(index=land_use.index)
    for column in results.columns:
        data = np.asanyarray(results[column])
        data.shape = (zone_count, zone_count)
        accessibility_df[column] = np.log(np.sum(data, axis=1) + 1)

        inject.add_column("accessibility", column, accessibility_df[column])

    if trace_od:

        if not trace_od_rows.any():
            logger.warn("trace_od not found origin = %s, dest = %s" %
                        (trace_orig, trace_dest))
        else:

            # add OD columns to trace results
            df = pd.concat([od_df[trace_od_rows], trace_results], axis=1)

            # dump the trace results table (with _temp variables) to aid debugging
            # note that this is not the same as the orca-injected accessibility table
            # FIXME - should we name this differently and also dump the updated accessibility table?
            tracing.trace_df(df,
                             label='accessibility',
                             index_label='skim_offset',
                             slicer='NONE',
                             warn_if_empty=True)

            if trace_assigned_locals:
                tracing.write_csv(trace_assigned_locals,
                                  file_name="accessibility_locals")
Esempio n. 9
0
def trip_mode_choice_simulate(trips_merged, trip_mode_choice_spec,
                              trip_mode_choice_settings, skim_dict, skim_stack,
                              trace_hh_id):
    """
    Trip mode choice simulate
    """

    trips = trips_merged.to_frame()

    nest_spec = config.get_logit_model_settings(trip_mode_choice_settings)
    constants = config.get_model_constants(trip_mode_choice_settings)

    logger.info("Running trip_mode_choice_simulate with %d trips" % len(trips))

    print "\ntrips.columns\n", trips.columns

    odt_skim_stack_wrapper = skim_stack.wrap(left_key='OTAZ',
                                             right_key='DTAZ',
                                             skim_key="start_period")

    od_skims = skim_dict.wrap('OTAZ', 'DTAZ')

    choices_list = []

    # loop by tour_type in order to easily query the expression coefficient file
    for tour_type, segment in trips.groupby('tour_type'):

        logger.info("running %s tour_type '%s'" % (
            len(segment.index),
            tour_type,
        ))

        # name index so tracing knows how to slice
        segment.index.name = 'trip_id'

        # FIXME - check that destination is not null

        trace_label = trace_hh_id and ('trip_mode_choice_%s' % tour_type)

        choices = _mode_choice_simulate(
            segment,
            odt_skim_stack_wrapper=odt_skim_stack_wrapper,
            dot_skim_stack_wrapper=None,
            od_skim_stack_wrapper=od_skims,
            spec=get_segment_and_unstack(trip_mode_choice_spec, tour_type),
            constants=constants,
            nest_spec=nest_spec,
            trace_label=trace_label,
            trace_choice_name='trip_mode_choice')

        # FIXME - no point in printing verbose value_counts now that we have tracing?
        tracing.print_summary('trip_mode_choice_simulate %s choices' %
                              tour_type,
                              choices,
                              value_counts=True)

        choices_list.append(choices)

        # FIXME - force garbage collection
        mem = memory_info()
        logger.debug('memory_info tour_type %s, %s' % (tour_type, mem))

    choices = pd.concat(choices_list)

    tracing.print_summary('trip_mode_choice_simulate all tour type choices',
                          choices,
                          value_counts=True)

    # FIXME - is this a NOP if trips table doesn't exist
    inject.add_column("trips", "trip_mode", choices)

    if trace_hh_id:

        tracing.trace_df(inject.get_table('trips').to_frame(),
                         label="trip_mode",
                         slicer='trip_id',
                         index_label='trip_id',
                         warn_if_empty=True)

    # FIXME - this forces garbage collection
    memory_info()
Esempio n. 10
0
def tour_mode_choice_simulate(tours_merged, tour_mode_choice_spec,
                              tour_mode_choice_settings, skim_dict, skim_stack,
                              trace_hh_id):
    """
    Tour mode choice simulate
    """

    trace_label = trace_hh_id and 'tour_mode_choice'

    tours = tours_merged.to_frame()

    tours = tours[tours.tour_category != 'subtour']

    nest_spec = config.get_logit_model_settings(tour_mode_choice_settings)
    constants = config.get_model_constants(tour_mode_choice_settings)

    logger.info("Running tour_mode_choice_simulate with %d tours" %
                len(tours.index))

    tracing.print_summary('tour_mode_choice_simulate tour_type',
                          tours.tour_type,
                          value_counts=True)

    if trace_hh_id:
        tracing.trace_df(tour_mode_choice_spec,
                         tracing.extend_trace_label(trace_label, 'spec'),
                         slicer='NONE',
                         transpose=False)

    # setup skim keys
    odt_skim_stack_wrapper = skim_stack.wrap(left_key='TAZ',
                                             right_key='destination',
                                             skim_key="out_period")
    dot_skim_stack_wrapper = skim_stack.wrap(left_key='destination',
                                             right_key='TAZ',
                                             skim_key="in_period")
    od_skims = skim_dict.wrap('TAZ', 'destination')

    choices_list = []

    for tour_type, segment in tours.groupby('tour_type'):

        # if tour_type != 'work':
        #     continue

        logger.info("tour_mode_choice_simulate tour_type '%s' (%s tours)" % (
            tour_type,
            len(segment.index),
        ))

        # name index so tracing knows how to slice
        segment.index.name = 'tour_id'

        spec = get_segment_and_unstack(tour_mode_choice_spec, tour_type)

        if trace_hh_id:
            tracing.trace_df(spec,
                             tracing.extend_trace_label(
                                 trace_label, 'spec.%s' % tour_type),
                             slicer='NONE',
                             transpose=False)

        choices = _mode_choice_simulate(
            segment,
            odt_skim_stack_wrapper=odt_skim_stack_wrapper,
            dot_skim_stack_wrapper=dot_skim_stack_wrapper,
            od_skim_stack_wrapper=od_skims,
            spec=spec,
            constants=constants,
            nest_spec=nest_spec,
            trace_label=tracing.extend_trace_label(trace_label, tour_type),
            trace_choice_name='tour_mode_choice')

        tracing.print_summary('tour_mode_choice_simulate %s choices' %
                              tour_type,
                              choices,
                              value_counts=True)

        choices_list.append(choices)

        # FIXME - force garbage collection
        mem = memory_info()
        logger.debug('memory_info tour_type %s, %s' % (tour_type, mem))

    choices = pd.concat(choices_list)

    tracing.print_summary('tour_mode_choice_simulate all tour type choices',
                          choices,
                          value_counts=True)

    inject.add_column("tours", "mode", choices)

    if trace_hh_id:
        trace_columns = ['mode', 'person_id', 'tour_type', 'tour_num']
        tracing.trace_df(inject.get_table('tours').to_frame(),
                         label=tracing.extend_trace_label(trace_label, 'mode'),
                         slicer='tour_id',
                         index_label='tour_id',
                         columns=trace_columns,
                         warn_if_empty=True)

    # FIXME - this forces garbage collection
    memory_info()
Esempio n. 11
0
def workplace_location_simulate(persons_merged, workplace_location_sample,
                                workplace_location_spec,
                                workplace_location_settings, skim_dict,
                                destination_size_terms, chunk_size,
                                trace_hh_id):
    """
    Workplace location model on workplace_location_sample annotated with mode_choice logsum
    to select a work_taz from sample alternatives
    """

    # for now I'm going to generate a workplace location for everyone -
    # presumably it will not get used in downstream models for everyone -
    # it should depend on CDAP and mandatory tour generation as to whether
    # it gets used
    choosers = persons_merged.to_frame()

    alt_col_name = workplace_location_settings["ALT_COL_NAME"]

    # alternatives are pre-sampled and annotated with logsums and pick_count
    # but we have to merge additional alt columns into alt sample list
    workplace_location_sample = workplace_location_sample.to_frame()
    destination_size_terms = destination_size_terms.to_frame()
    alternatives = \
        pd.merge(workplace_location_sample, destination_size_terms,
                 left_on=alt_col_name, right_index=True, how="left")

    tracing.dump_df(DUMP, alternatives, 'workplace_location_simulate',
                    'alternatives')

    constants = config.get_model_constants(workplace_location_settings)

    sample_pool_size = len(destination_size_terms.index)

    logger.info("Running workplace_location_simulate with %d persons" %
                len(choosers))

    # create wrapper with keys for this lookup - in this case there is a TAZ in the choosers
    # and a TAZ in the alternatives which get merged during interaction
    # the skims will be available under the name "skims" for any @ expressions
    skims = skim_dict.wrap("TAZ", alt_col_name)

    locals_d = {'skims': skims, 'sample_pool_size': float(sample_pool_size)}
    if constants is not None:
        locals_d.update(constants)

    # FIXME - MEMORY HACK - only include columns actually used in spec
    chooser_columns = workplace_location_settings['SIMULATE_CHOOSER_COLUMNS']
    choosers = choosers[chooser_columns]

    tracing.dump_df(DUMP, choosers, 'workplace_location_simulate', 'choosers')

    choices = interaction_sample_simulate(
        choosers,
        alternatives,
        spec=workplace_location_spec,
        choice_column=alt_col_name,
        skims=skims,
        locals_d=locals_d,
        chunk_size=chunk_size,
        trace_label=trace_hh_id and 'workplace_location',
        trace_choice_name='workplace_location')

    # FIXME - no need to reindex since we didn't slice choosers
    # choices = choices.reindex(persons_merged.index)

    tracing.print_summary('workplace_taz', choices, describe=True)

    inject.add_column("persons", "workplace_taz", choices)

    pipeline.add_dependent_columns("persons", "persons_workplace")

    if trace_hh_id:
        trace_columns = ['workplace_taz'
                         ] + inject.get_table('persons_workplace').columns
        tracing.trace_df(inject.get_table('persons_merged').to_frame(),
                         label="workplace_location",
                         columns=trace_columns,
                         warn_if_empty=True)
Esempio n. 12
0
def workplace_location_logsums(persons_merged, land_use, skim_dict, skim_stack,
                               workplace_location_sample,
                               workplace_location_settings, configs_dir,
                               chunk_size, trace_hh_id):
    """
    add logsum column to existing workplace_location_sample able

    logsum is calculated by running the mode_choice model for each sample (person, dest_taz) pair
    in workplace_location_sample, and computing the logsum of all the utilities

    +-------+--------------+----------------+------------+----------------+
    | PERID | dest_TAZ     | rand           | pick_count | logsum (added) |
    +=======+==============+================+============+================+
    | 23750 |  14          | 0.565502716034 | 4          |  1.85659498857 |
    +-------+--------------+----------------+------------+----------------+
    + 23750 | 16           | 0.711135838871 | 6          | 1.92315598631  |
    +-------+--------------+----------------+------------+----------------+
    + ...   |              |                |            |                |
    +-------+--------------+----------------+------------+----------------+
    | 23751 | 12           | 0.408038878552 | 1          | 2.40612135416  |
    +-------+--------------+----------------+------------+----------------+
    | 23751 | 14           | 0.972732479292 | 2          |  1.44009018355 |
    +-------+--------------+----------------+------------+----------------+
    """

    trace_label = 'workplace_location_logsums'

    logsums_spec = mode_choice_logsums_spec(configs_dir, 'work')

    alt_col_name = workplace_location_settings["ALT_COL_NAME"]
    chooser_col_name = 'TAZ'

    # FIXME - just using settings from tour_mode_choice
    logsum_settings = config.read_model_settings(configs_dir,
                                                 'tour_mode_choice.yaml')

    persons_merged = persons_merged.to_frame()
    workplace_location_sample = workplace_location_sample.to_frame()

    logger.info("Running workplace_location_sample with %s rows" %
                len(workplace_location_sample))

    # FIXME - MEMORY HACK - only include columns actually used in spec
    chooser_columns = workplace_location_settings['LOGSUM_CHOOSER_COLUMNS']
    persons_merged = persons_merged[chooser_columns]

    choosers = pd.merge(workplace_location_sample,
                        persons_merged,
                        left_index=True,
                        right_index=True,
                        how="left")

    choosers['in_period'] = skim_time_period_label(
        workplace_location_settings['IN_PERIOD'])
    choosers['out_period'] = skim_time_period_label(
        workplace_location_settings['OUT_PERIOD'])

    # FIXME - should do this in expression file?
    choosers['dest_topology'] = reindex(land_use.TOPOLOGY,
                                        choosers[alt_col_name])
    choosers['dest_density_index'] = reindex(land_use.density_index,
                                             choosers[alt_col_name])

    tracing.dump_df(DUMP, persons_merged, trace_label, 'persons_merged')
    tracing.dump_df(DUMP, choosers, trace_label, 'choosers')

    logsums = compute_logsums(choosers, logsums_spec, logsum_settings,
                              skim_dict, skim_stack, chooser_col_name,
                              alt_col_name, chunk_size, trace_hh_id,
                              trace_label)

    # "add_column series should have an index matching the table to which it is being added"
    # when the index has duplicates, however, in the special case that the series index exactly
    # matches the table index, then the series value order is preserved
    # logsums now does, since workplace_location_sample was on left side of merge de-dup merge
    inject.add_column("workplace_location_sample", "mode_choice_logsum",
                      logsums)
Esempio n. 13
0
def final_seed_balancing(settings, crosswalk, control_spec, incidence_table):
    """
    Balance the household weights for each of the seed geographies (independently)
    using the seed level controls and the aggregated sub-zone controls totals.

    Create the seed_weights table with one row per household and columns contaiing
    household_id, seed geography (e.g. PUMA), and float preliminary_balanced_weights

    Adds column balanced_weight  to the seed_weights table

    Parameters
    ----------
    settings : dict (settings.yaml as dict)
    crosswalk : pipeline table
    control_spec : pipeline table
    incidence_table : pipeline table

    Returns
    -------

    """

    crosswalk_df = crosswalk.to_frame()
    incidence_df = incidence_table.to_frame()
    control_spec = control_spec.to_frame()

    seed_geography = settings.get('seed_geography')
    seed_weight_table_name = weight_table_name(seed_geography)

    # if there are no meta controls, then balanced_weight is simply preliminary_balanced_weight
    geographies = settings['geographies']
    if not (control_spec.geography == geographies[0]).any():
        logger.warning("no need for final_seed_balancing because no meta controls")
        seed_weights_df = get_weight_table(seed_geography)
        if 'balanced_weight' not in seed_weights_df:
            final_seed_weights = seed_weights_df['preliminary_balanced_weight']
            inject.add_column(seed_weight_table_name, 'balanced_weight', final_seed_weights)
        return

    # we use all control_spec rows, so no need to filter on geography as for initial_seed_balancing
    seed_controls_df = get_control_table(seed_geography)
    assert (seed_controls_df.columns == control_spec.target).all()

    # determine master_control_index if specified in settings
    total_hh_control_col = setting('total_hh_control')

    max_expansion_factor = settings.get('max_expansion_factor', None)
    min_expansion_factor = settings.get('min_expansion_factor', None)
    absolute_upper_bound = settings.get('absolute_upper_bound', None)
    absolute_lower_bound = settings.get('absolute_lower_bound', None)

    relaxation_factors = pd.DataFrame(index=seed_controls_df.columns.tolist())

    # run balancer for each seed geography
    weight_list = []

    seed_ids = crosswalk_df[seed_geography].unique()
    for seed_id in seed_ids:

        logger.info("final_seed_balancing seed id %s" % seed_id)

        seed_incidence_df = incidence_df[incidence_df[seed_geography] == seed_id]

        status, weights_df, controls_df = do_balancing(
            control_spec=control_spec,
            total_hh_control_col=total_hh_control_col,
            max_expansion_factor=max_expansion_factor,
            min_expansion_factor=min_expansion_factor,
            absolute_lower_bound=absolute_lower_bound,
            absolute_upper_bound=absolute_upper_bound,
            incidence_df=seed_incidence_df,
            control_totals=seed_controls_df.loc[seed_id],
            initial_weights=seed_incidence_df['sample_weight'])

        logger.info("seed_balancer status: %s" % status)
        if not status['converged']:
            raise RuntimeError("final_seed_balancing for seed_id %s did not converge" % seed_id)

        weight_list.append(weights_df['final'])

        relaxation_factors[seed_id] = controls_df['relaxation_factor']

    # bulk concat all seed level results
    final_seed_weights = pd.concat(weight_list)

    inject.add_column(seed_weight_table_name, 'balanced_weight', final_seed_weights)
def non_mandatory_tour_frequency(persons_merged,
                                 non_mandatory_tour_frequency_alts,
                                 non_mandatory_tour_frequency_spec,
                                 non_mandatory_tour_frequency_settings,
                                 chunk_size,
                                 trace_hh_id):

    """
    This model predicts the frequency of making non-mandatory trips
    (alternatives for this model come from a separate csv file which is
    configured by the user) - these trips include escort, shopping, othmaint,
    othdiscr, eatout, and social trips in various combination.
    """

    t0 = print_elapsed_time()

    choosers = persons_merged.to_frame()

    non_mandatory_tour_frequency_alts['tot_tours'] = non_mandatory_tour_frequency_alts.sum(axis=1)

    # filter based on results of CDAP
    choosers = choosers[choosers.cdap_activity.isin(['M', 'N'])]

    logger.info("Running non_mandatory_tour_frequency with %d persons" % len(choosers))

    constants = config.get_model_constants(non_mandatory_tour_frequency_settings)

    choices_list = []
    # segment by person type and pick the right spec for each person type
    for name, segment in choosers.groupby('ptype_cat'):

        logger.info("Running segment '%s' of size %d" % (name, len(segment)))

        choices = interaction_simulate(
            segment,
            non_mandatory_tour_frequency_alts,
            # notice that we pick the column for the segment for each segment we run
            spec=non_mandatory_tour_frequency_spec[[name]],
            locals_d=constants,
            chunk_size=chunk_size,
            trace_label='non_mandatory_tour_frequency.%s' % name,
            trace_choice_name='non_mandatory_tour_frequency')

        choices_list.append(choices)

        t0 = print_elapsed_time("non_mandatory_tour_frequency.%s" % name, t0, debug=True)

        # FIXME - force garbage collection
        # force_garbage_collect()

    choices = pd.concat(choices_list)

    tracing.print_summary('non_mandatory_tour_frequency', choices, value_counts=True)

    # FIXME - no need to reindex?
    # FIXME - how about the persons not processed
    inject.add_column("persons", "non_mandatory_tour_frequency", choices)

    create_non_mandatory_tours(trace_hh_id)

    # add non_mandatory_tour-dependent columns (e.g. tour counts) to persons
    pipeline.add_dependent_columns("persons", "persons_nmtf")

    if trace_hh_id:
        trace_columns = ['non_mandatory_tour_frequency']
        tracing.trace_df(inject.get_table('persons').to_frame(),
                         label="non_mandatory_tour_frequency.persons",
                         # columns=trace_columns,
                         warn_if_empty=True)
Esempio n. 15
0
def school_location_simulate(persons_merged, school_location_sample,
                             school_location_spec, school_location_settings,
                             skim_dict, destination_size_terms, chunk_size,
                             trace_hh_id):
    """
    School location model on school_location_sample annotated with mode_choice logsum
    to select a school_taz from sample alternatives
    """

    choosers = persons_merged.to_frame()
    school_location_sample = school_location_sample.to_frame()
    destination_size_terms = destination_size_terms.to_frame()

    trace_label = 'school_location_simulate'
    alt_col_name = school_location_settings["ALT_COL_NAME"]

    constants = config.get_model_constants(school_location_settings)

    # create wrapper with keys for this lookup - in this case there is a TAZ in the choosers
    # and a TAZ in the alternatives which get merged during interaction
    # the skims will be available under the name "skims" for any @ expressions
    skims = skim_dict.wrap("TAZ", alt_col_name)

    locals_d = {
        'skims': skims,
    }
    if constants is not None:
        locals_d.update(constants)

    # FIXME - MEMORY HACK - only include columns actually used in spec
    chooser_columns = school_location_settings['SIMULATE_CHOOSER_COLUMNS']
    choosers = choosers[chooser_columns]
    tracing.dump_df(DUMP, choosers, 'school_location_simulate', 'choosers')

    choices_list = []
    for school_type in ['university', 'highschool', 'gradeschool']:

        locals_d['segment'] = school_type

        choosers_segment = choosers[choosers["is_" + school_type]]
        alts_segment = school_location_sample[
            school_location_sample['school_type'] == school_type]

        # alternatives are pre-sampled and annotated with logsums and pick_count
        # but we have to merge additional alt columns into alt sample list
        alts_segment = \
            pd.merge(alts_segment, destination_size_terms,
                     left_on=alt_col_name, right_index=True, how="left")

        tracing.dump_df(DUMP, alts_segment, trace_label,
                        '%s_alternatives' % school_type)

        choices = interaction_sample_simulate(
            choosers_segment,
            alts_segment,
            spec=school_location_spec[[school_type]],
            choice_column=alt_col_name,
            skims=skims,
            locals_d=locals_d,
            chunk_size=chunk_size,
            trace_label=tracing.extend_trace_label(trace_label, school_type),
            trace_choice_name='school_location')

        choices_list.append(choices)

    choices = pd.concat(choices_list)

    # We only chose school locations for the subset of persons who go to school
    # so we backfill the empty choices with -1 to code as no school location
    choices = choices.reindex(persons_merged.index).fillna(-1).astype(int)

    tracing.dump_df(DUMP, choices, trace_label, 'choices')

    tracing.print_summary('school_taz', choices, describe=True)

    inject.add_column("persons", "school_taz", choices)

    pipeline.add_dependent_columns("persons", "persons_school")

    if trace_hh_id:
        trace_columns = ['school_taz'
                         ] + inject.get_table('persons_school').columns
        tracing.trace_df(inject.get_table('persons_merged').to_frame(),
                         label="school_location",
                         columns=trace_columns,
                         warn_if_empty=True)
Esempio n. 16
0
def final_seed_balancing(settings, crosswalk, control_spec, incidence_table):

    crosswalk_df = crosswalk.to_frame()
    incidence_df = incidence_table.to_frame()
    control_spec = control_spec.to_frame()

    seed_geography = settings.get('seed_geography')
    seed_weight_table_name = weight_table_name(seed_geography)

    # if there are no meta controls, then balanced_weight is simply preliminary_balanced_weight
    geographies = settings['geographies']
    if not (control_spec.geography == geographies[0]).any():
        logger.warning(
            "no need for final_seed_balancing because no meta controls")
        seed_weights_df = get_weight_table(seed_geography)
        if 'balanced_weight' not in seed_weights_df:
            final_seed_weights = seed_weights_df['preliminary_balanced_weight']
            inject.add_column(seed_weight_table_name, 'balanced_weight',
                              final_seed_weights)
        return

    # we use all control_spec rows, so no need to filter on geography as for initial_seed_balancing
    seed_controls_df = get_control_table(seed_geography)
    assert (seed_controls_df.columns == control_spec.target).all()

    # determine master_control_index if specified in settings
    total_hh_control_col = settings.get('total_hh_control')

    max_expansion_factor = settings.get('max_expansion_factor', None)

    relaxation_factors = pd.DataFrame(index=seed_controls_df.columns.tolist())

    # run balancer for each seed geography
    weight_list = []

    seed_ids = crosswalk_df[seed_geography].unique()
    for seed_id in seed_ids:

        logger.info("initial_seed_balancing seed id %s" % seed_id)

        seed_incidence_df = incidence_df[incidence_df[seed_geography] ==
                                         seed_id]

        status, weights_df, controls_df = do_balancing(
            control_spec=control_spec,
            total_hh_control_col=total_hh_control_col,
            max_expansion_factor=max_expansion_factor,
            incidence_df=seed_incidence_df,
            control_totals=seed_controls_df.loc[seed_id],
            initial_weights=seed_incidence_df['sample_weight'])

        logger.info("seed_balancer status: %s" % status)
        if not status['converged']:
            raise RuntimeError(
                "final_seed_balancing for seed_id %s did not converge" %
                seed_id)

        weight_list.append(weights_df['final'])

        relaxation_factors[seed_id] = controls_df['relaxation_factor']

    # bulk concat all seed level results
    final_seed_weights = pd.concat(weight_list)

    inject.add_column(seed_weight_table_name, 'balanced_weight',
                      final_seed_weights)
Esempio n. 17
0
def integerize_final_seed_weights(settings, crosswalk, control_spec,
                                  incidence_table):
    """
    Final balancing for each seed (puma) zone with aggregated low and mid-level controls and
    distributed meta-level controls.

    Adds integer_weight column to seed-level weight table

    Parameters
    ----------
    settings : dict (settings.yaml as dict)
    crosswalk : pipeline table
    control_spec : pipeline table
    incidence_table : pipeline table

    Returns
    -------

    """

    if setting('NO_INTEGERIZATION_EVER', False):
        logger.warning(
            "skipping integerize_final_seed_weights: NO_INTEGERIZATION_EVER")
        return

    crosswalk_df = crosswalk.to_frame()
    incidence_df = incidence_table.to_frame()
    control_spec = control_spec.to_frame()

    seed_geography = settings.get('seed_geography')
    seed_controls_df = get_control_table(seed_geography)

    seed_weights_df = get_weight_table(seed_geography)

    # FIXME - I assume we want to integerize using meta controls too?
    control_cols = control_spec.target
    assert (seed_controls_df.columns == control_cols).all()

    # determine master_control_index if specified in settings
    total_hh_control_col = setting('total_hh_control')

    # run balancer for each seed geography
    weight_list = []

    seed_ids = crosswalk_df[seed_geography].unique()
    for seed_id in seed_ids:

        logger.info("integerize_final_seed_weights seed id %s" % seed_id)

        # slice incidence rows for this seed geography
        seed_incidence = incidence_df[incidence_df[seed_geography] == seed_id]

        balanced_seed_weights = \
            seed_weights_df.loc[seed_weights_df[seed_geography] == seed_id, 'balanced_weight']

        trace_label = "%s_%s" % (seed_geography, seed_id)

        integer_weights, status = do_integerizing(
            trace_label=trace_label,
            control_spec=control_spec,
            control_totals=seed_controls_df.loc[seed_id],
            incidence_table=seed_incidence[control_cols],
            float_weights=balanced_seed_weights,
            total_hh_control_col=total_hh_control_col)

        weight_list.append(integer_weights)

    # bulk concat all seed level results
    integer_seed_weights = pd.concat(weight_list)

    inject.add_column(weight_table_name(seed_geography), 'integer_weight',
                      integer_seed_weights)