Esempio n. 1
0
def create_od_table(od_index, spec, locals_dict, trace_od):
    """Assign variables with ActivitySim's assign and register output to pipeline

    Parameters
    ----------
    od_index : pandas MultiIndex
    spec : pandas DataFrame, assignment expressions
    locals_dict : dict,
        dictionary containing constants and zone matrices
    trace_od : list or dict, origin-destination pair

    Returns
    -------
    od_table : pandas DataFrame
        all origin-destination pairs
    """

    logger.info('creating OD table ...')

    od_df = od_index.to_frame(index=False)
    trace_rows = trace.trace_filter(od_df, trace_od)
    od_table, trace_results, _ = assign.assign_variables(
        spec, od_df, locals_dict=locals_dict, trace_rows=trace_rows)

    if trace_results is not None:
        tracing.write_csv(trace_results, file_name='od_table', transpose=False)

    od_table.set_index(od_index, inplace=True)

    logger.info('registering OD table to pipeline ...')
    pipeline.replace_table('od_table', od_table)
    create_zone_summary(od_table.reset_index())

    return od_table
Esempio n. 2
0
def aggregate_zone_processor(zones, trace_od):
    """
    zones: orca table

    zone data for base and build scenario dat files combined into a single dataframe
    with columns names prefixed with base_ or build_ indexed by ZONE
    """

    trace_label = 'aggregate_zone'
    model_settings = config.read_model_settings('aggregate_zone.yaml')
    spec_file_name = model_settings.get('spec_file_name', 'aggregate_zone.csv')
    aggregate_zone_spec = bca.read_assignment_spec(spec_file_name)

    zones_df = zones.to_frame()

    logger.info("Running aggregate_zone_processor with %d zones" %
                (len(zones_df.index), ))

    if trace_od:
        trace_orig, trace_dest = trace_od
        trace_od_rows = (zones_df.index == trace_orig) | (zones_df.index
                                                          == trace_dest)
    else:
        trace_od_rows = None

    # locals whose values will be accessible to the execution context
    # when the expressions in spec are applied to choosers
    locals_dict = config.get_model_constants(model_settings)
    locals_dict.update(config.setting('globals'))

    # eval_variables evaluates each of the expressions in spec
    # in the context of each row in of the choosers dataframe
    results, trace_results, trace_assigned_locals = \
        assign.assign_variables(aggregate_zone_spec,
                                zones_df,
                                locals_dict,
                                df_alias='zones',
                                trace_rows=trace_od_rows)

    pipeline.replace_table('aggregate_zone_summary', results)

    if trace_results is not None:

        tracing.write_csv(trace_results,
                          file_name="aggregate_zone",
                          index_label='zone',
                          column_labels=['label', 'zone'])

        if trace_assigned_locals:
            tracing.write_csv(trace_assigned_locals,
                              file_name="aggregate_zone_locals")
Esempio n. 3
0
    def compute_tap_tap_time(self, recipe, access_df, egress_df,
                             chooser_attributes, trace_label, trace):

        trace_label = tracing.extend_trace_label(trace_label,
                                                 'compute_tap_tap_time')

        model_constants = self.network_los.setting(
            f'TVPB_SETTINGS.{recipe}.CONSTANTS')
        tap_tap_settings = self.network_los.setting(
            f'TVPB_SETTINGS.{recipe}.tap_tap_settings')

        with memo("#TVPB CACHE compute_tap_tap_utilities all_transit_paths"):
            transit_df = self.all_transit_paths(access_df, egress_df,
                                                chooser_attributes,
                                                trace_label, trace)
            # note: transit_df index is arbitrary
            chunk.log_df(trace_label, "transit_df", transit_df)

        locals_d = {'los': self.network_los}
        locals_d.update(model_constants)

        assignment_spec = assign.read_assignment_spec(
            file_name=config.config_file_path(tap_tap_settings['SPEC']))

        results, _, _ = assign.assign_variables(assignment_spec, transit_df,
                                                locals_d)
        assert len(results.columns == 1)
        transit_df['transit'] = results

        # filter out unavailable btap_atap pairs
        logger.debug(
            f"{(transit_df['transit'] <= 0).sum()} unavailable tap_tap pairs out of {len(transit_df)}"
        )
        transit_df = transit_df[transit_df.transit > 0]

        transit_df.drop(columns=chooser_attributes.columns, inplace=True)

        chunk.log_df(trace_label, "transit_df", None)

        if trace:
            self.trace_df(transit_df, trace_label, 'transit_df')

        return transit_df
Esempio n. 4
0
def aggregate_demographics_processor(zone_hhs, aggregate_demographics_spec,
                                     settings, trace_od):
    """

    Parameters
    ----------
    zone_hhs : orca table
        input zone demographics

    """

    trace_label = 'aggregate_demographics'
    model_settings = config.read_model_settings('aggregate_demographics.yaml')

    zone_hhs_df = zone_hhs.to_frame()

    logger.info("Running %s with %d zones" % (
        trace_label,
        len(zone_hhs_df),
    ))

    if trace_od:
        trace_orig, trace_dest = trace_od
        trace_od_rows = (zone_hhs_df.index == trace_orig) | (zone_hhs_df.index
                                                             == trace_dest)
    else:
        trace_od_rows = None

    # locals whose values will be accessible to the execution context
    # when the expressions in spec are applied to choosers
    locals_dict = config.get_model_constants(model_settings)
    locals_dict.update(config.setting('globals'))

    trace_rows = None

    # eval_variables evaluates each of the expressions in spec
    # in the context of each row in of the choosers dataframe
    results, trace_results, trace_assigned_locals = \
        assign.assign_variables(aggregate_demographics_spec,
                                zone_hhs_df,
                                locals_dict,
                                df_alias='hhs',
                                trace_rows=trace_od_rows)

    pipeline.replace_table("zone_demographics", results)

    # expression file can use silos column to designate result targets (e.g. count of households)
    add_aggregate_results(results,
                          aggregate_demographics_spec,
                          source=trace_label)

    if trace_results is not None:

        tracing.write_csv(trace_results,
                          file_name="aggregate_demographics",
                          index_label='zone',
                          column_labels=['label', 'zone'])

        if trace_assigned_locals:
            tracing.write_csv(trace_assigned_locals,
                              file_name="aggregate_demographics_locals")
Esempio n. 5
0
def compute_accessibility(accessibility, skim_dict, land_use, trace_od):

    """
    Compute accessibility for each zone in land use file using expressions from accessibility_spec

    The actual results depend on the expressions in accessibility_spec, but this is initially
    intended to permit implementation of the mtc accessibility calculation as implemented by
    Accessibility.job

    Compute measures of accessibility used by the automobile ownership model.
    The accessibility measure first multiplies an employment variable by a mode-specific decay
    function.  The product reflects the difficulty of accessing the activities the farther
    (in terms of round-trip travel time) the jobs are from the location in question. The products
    to each destination zone are next summed over each origin zone, and the logarithm of the
    product mutes large differences.  The decay function on the walk accessibility measure is
    steeper than automobile or transit.  The minimum accessibility is zero.
    """

    trace_label = 'compute_accessibility'
    model_settings = config.read_model_settings('accessibility.yaml')
    assignment_spec = assign.read_assignment_spec(config.config_file_path('accessibility.csv'))

    accessibility_df = accessibility.to_frame()

    logger.info("Running %s with %d dest zones" % (trace_label, len(accessibility_df)))

    constants = config.get_model_constants(model_settings)
    land_use_columns = model_settings.get('land_use_columns', [])

    land_use_df = land_use.to_frame()

    # #bug
    #
    # land_use_df = land_use_df[land_use_df.index % 2 == 1]
    # accessibility_df = accessibility_df[accessibility_df.index.isin(land_use_df.index)].head(5)
    #
    # print "land_use_df", land_use_df.index
    # print "accessibility_df", accessibility_df.index
    # #bug

    orig_zones = accessibility_df.index.values
    dest_zones = land_use_df.index.values

    orig_zone_count = len(orig_zones)
    dest_zone_count = len(dest_zones)

    logger.info("Running %s with %d dest zones %d orig zones" %
                (trace_label, dest_zone_count, orig_zone_count))

    # create OD dataframe
    od_df = pd.DataFrame(
        data={
            'orig': np.repeat(np.asanyarray(accessibility_df.index), dest_zone_count),
            'dest': np.tile(np.asanyarray(land_use_df.index), orig_zone_count)
        }
    )

    if trace_od:
        trace_orig, trace_dest = trace_od
        trace_od_rows = (od_df.orig == trace_orig) & (od_df.dest == trace_dest)
    else:
        trace_od_rows = None

    # merge land_use_columns into od_df
    land_use_df = land_use_df[land_use_columns]
    od_df = pd.merge(od_df, land_use_df, left_on='dest', right_index=True).sort_index()

    locals_d = {
        'log': np.log,
        'exp': np.exp,
        'skim_od': AccessibilitySkims(skim_dict, orig_zones, dest_zones),
        'skim_do': AccessibilitySkims(skim_dict, orig_zones, dest_zones, transpose=True)
    }
    if constants is not None:
        locals_d.update(constants)

    results, trace_results, trace_assigned_locals \
        = assign.assign_variables(assignment_spec, od_df, locals_d, trace_rows=trace_od_rows)

    for column in results.columns:
        data = np.asanyarray(results[column])
        data.shape = (orig_zone_count, dest_zone_count)
        accessibility_df[column] = np.log(np.sum(data, axis=1) + 1)

    # - write table to pipeline
    pipeline.replace_table("accessibility", accessibility_df)

    if trace_od:

        if not trace_od_rows.any():
            logger.warning("trace_od not found origin = %s, dest = %s" % (trace_orig, trace_dest))
        else:

            # add OD columns to trace results
            df = pd.concat([od_df[trace_od_rows], trace_results], axis=1)

            # dump the trace results table (with _temp variables) to aid debugging
            tracing.trace_df(df,
                             label='accessibility',
                             index_label='skim_offset',
                             slicer='NONE',
                             warn_if_empty=True)

            if trace_assigned_locals:
                tracing.write_csv(trace_assigned_locals, file_name="accessibility_locals")
Esempio n. 6
0
def best_transit_path(set_random_seed,
                      network_los,
                      best_transit_path_spec):

    model_settings = config.read_model_settings('best_transit_path.yaml')

    logger.info("best_transit_path VECTOR_TEST_SIZE %s", VECTOR_TEST_SIZE)

    omaz = network_los.maz_df.sample(VECTOR_TEST_SIZE, replace=True).index
    dmaz = network_los.maz_df.sample(VECTOR_TEST_SIZE, replace=True).index
    tod = np.random.choice(['AM', 'PM'], VECTOR_TEST_SIZE)
    od_df = pd.DataFrame({'omaz': omaz, 'dmaz': dmaz, 'tod': tod})

    trace_od = (od_df.omaz[0], od_df.dmaz[0])
    logger.info("trace_od omaz %s dmaz %s" % trace_od)

    # build exploded atap_btap_df

    # FIXME - pathological knowledge about mode - should be parameterized
    # filter out rows with no drive time omaz-btap or no walk time from dmaz-atap
    atap_btap_df = network_los.get_tappairs_mazpairs(od_df.omaz, od_df.dmaz,
                                                     ofilter='drive_time',
                                                     dfilter='walk_alightingActual')

    # add in tod column
    atap_btap_df = atap_btap_df.merge(
        right=od_df[['tod']],
        left_on='idx',
        right_index=True,
        how='left'
    )

    logger.info("len od_df %s", len(od_df.index))
    logger.info("len atap_btap_df %s", len(atap_btap_df.index))
    logger.info("avg explosion %s", (len(atap_btap_df.index) / (1.0 * len(od_df.index))))

    if trace_od:
        trace_orig, trace_dest = trace_od
        trace_oabd_rows = (atap_btap_df.omaz == trace_orig) & (atap_btap_df.dmaz == trace_dest)
    else:
        trace_oabd_rows = None

    constants = config.get_model_constants(model_settings)
    locals_d = {
        'np': np,
        'network_los': network_los
    }
    if constants is not None:
        locals_d.update(constants)

    results, trace_results, trace_assigned_locals \
        = assign.assign_variables(best_transit_path_spec, atap_btap_df, locals_d,
                                  trace_rows=trace_oabd_rows)

    # copy results
    for column in results.columns:
        atap_btap_df[column] = results[column]

    # drop rows if no utility
    n = len(atap_btap_df.index)
    atap_btap_df = atap_btap_df.dropna(subset=['utility'])

    logger.info("Dropped %s of %s rows with null utility", n - len(atap_btap_df.index), n)

    # choose max utility
    atap_btap_df = atap_btap_df.sort_values(by='utility').groupby('idx').tail(1)

    if trace_od:

        if not trace_oabd_rows.any():
            logger.warning("trace_od not found origin = %s, dest = %s", trace_orig, trace_dest)
        else:

            tracing.trace_df(atap_btap_df,
                             label='best_transit_path',
                             slicer='NONE',
                             transpose=False)

            tracing.trace_df(trace_results,
                             label='trace_best_transit_path',
                             slicer='NONE',
                             transpose=False)

            if trace_assigned_locals:
                tracing.write_csv(trace_assigned_locals, file_name="trace_best_transit_path_locals")
Esempio n. 7
0
def compute_accessibility(accessibility, network_los, land_use, trace_od):
    """
    Compute accessibility for each zone in land use file using expressions from accessibility_spec

    The actual results depend on the expressions in accessibility_spec, but this is initially
    intended to permit implementation of the mtc accessibility calculation as implemented by
    Accessibility.job

    Compute measures of accessibility used by the automobile ownership model.
    The accessibility measure first multiplies an employment variable by a mode-specific decay
    function.  The product reflects the difficulty of accessing the activities the farther
    (in terms of round-trip travel time) the jobs are from the location in question. The products
    to each destination zone are next summed over each origin zone, and the logarithm of the
    product mutes large differences.  The decay function on the walk accessibility measure is
    steeper than automobile or transit.  The minimum accessibility is zero.
    """

    trace_label = 'compute_accessibility'
    model_settings = config.read_model_settings('accessibility.yaml')
    assignment_spec = assign.read_assignment_spec(
        config.config_file_path('accessibility.csv'))

    accessibility_df = accessibility.to_frame()

    logger.info("Running %s with %d dest zones" %
                (trace_label, len(accessibility_df)))

    constants = config.get_model_constants(model_settings)

    land_use_columns = model_settings.get('land_use_columns', [])
    land_use_df = land_use.to_frame()
    land_use_df = land_use_df[land_use_columns]

    # don't assume they are the same: accessibility may be sliced if we are multiprocessing
    orig_zones = accessibility_df.index.values
    dest_zones = land_use_df.index.values

    orig_zone_count = len(orig_zones)
    dest_zone_count = len(dest_zones)

    logger.info("Running %s with %d dest zones %d orig zones" %
                (trace_label, dest_zone_count, orig_zone_count))

    # create OD dataframe
    od_df = pd.DataFrame(
        data={
            'orig': np.repeat(orig_zones, dest_zone_count),
            'dest': np.tile(dest_zones, orig_zone_count)
        })

    if trace_od:
        trace_orig, trace_dest = trace_od
        trace_od_rows = (od_df.orig == trace_orig) & (od_df.dest == trace_dest)
    else:
        trace_od_rows = None

    # merge land_use_columns into od_df
    od_df = pd.merge(od_df, land_use_df, left_on='dest',
                     right_index=True).sort_index()

    locals_d = {
        'log': np.log,
        'exp': np.exp,
        'network_los': network_los,
    }

    skim_dict = network_los.get_default_skim_dict()
    locals_d['skim_od'] = skim_dict.wrap('orig', 'dest').set_df(od_df)
    locals_d['skim_do'] = skim_dict.wrap('dest', 'orig').set_df(od_df)

    if network_los.zone_system == los.THREE_ZONE:
        locals_d['tvpb'] = TransitVirtualPathBuilder(network_los)

    if constants is not None:
        locals_d.update(constants)

    results, trace_results, trace_assigned_locals \
        = assign.assign_variables(assignment_spec, od_df, locals_d, trace_rows=trace_od_rows)

    for column in results.columns:
        data = np.asanyarray(results[column])
        data.shape = (orig_zone_count, dest_zone_count)  # (o,d)
        accessibility_df[column] = np.log(np.sum(data, axis=1) + 1)

    logger.info("{trace_label} added {len(results.columns} columns")

    # - write table to pipeline
    pipeline.replace_table("accessibility", accessibility_df)

    if trace_od:

        if not trace_od_rows.any():
            logger.warning(
                f"trace_od not found origin = {trace_orig}, dest = {trace_dest}"
            )
        else:

            # add OD columns to trace results
            df = pd.concat([od_df[trace_od_rows], trace_results], axis=1)

            # dump the trace results table (with _temp variables) to aid debugging
            tracing.trace_df(df,
                             label='accessibility',
                             index_label='skim_offset',
                             slicer='NONE',
                             warn_if_empty=True)

            if trace_assigned_locals:
                tracing.write_csv(trace_assigned_locals,
                                  file_name="accessibility_locals")
Esempio n. 8
0
def compute_accessibilities_for_zones(accessibility_df, land_use_df,
                                      assignment_spec, constants, network_los,
                                      trace_od, trace_label):

    orig_zones = accessibility_df.index.values
    dest_zones = land_use_df.index.values

    orig_zone_count = len(orig_zones)
    dest_zone_count = len(dest_zones)

    logger.info("Running %s with %d orig zones %d dest zones" %
                (trace_label, orig_zone_count, dest_zone_count))

    # create OD dataframe
    od_df = pd.DataFrame(
        data={
            'orig': np.repeat(orig_zones, dest_zone_count),
            'dest': np.tile(dest_zones, orig_zone_count)
        })

    if trace_od:
        trace_orig, trace_dest = trace_od
        trace_od_rows = (od_df.orig == trace_orig) & (od_df.dest == trace_dest)
    else:
        trace_od_rows = None

    # merge land_use_columns into od_df
    od_df = pd.merge(od_df, land_use_df, left_on='dest',
                     right_index=True).sort_index()
    chunk.log_df(trace_label, "od_df", od_df)

    locals_d = {
        'log': np.log,
        'exp': np.exp,
        'network_los': network_los,
    }
    locals_d.update(constants)

    skim_dict = network_los.get_default_skim_dict()
    locals_d['skim_od'] = skim_dict.wrap('orig', 'dest').set_df(od_df)
    locals_d['skim_do'] = skim_dict.wrap('dest', 'orig').set_df(od_df)

    if network_los.zone_system == los.THREE_ZONE:
        locals_d['tvpb'] = network_los.tvpb

    results, trace_results, trace_assigned_locals \
        = assign.assign_variables(assignment_spec, od_df, locals_d,
                                  trace_rows=trace_od_rows, trace_label=trace_label, chunk_log=True)

    chunk.log_df(trace_label, "results", results)

    # accessibility_df = accessibility_df.copy()
    for column in results.columns:
        data = np.asanyarray(results[column])
        data.shape = (orig_zone_count, dest_zone_count)  # (o,d)
        accessibility_df[column] = np.log(np.sum(data, axis=1) + 1)

    if trace_od:

        if not trace_od_rows.any():
            logger.warning(
                f"trace_od not found origin = {trace_orig}, dest = {trace_dest}"
            )
        else:

            # add OD columns to trace results
            df = pd.concat([od_df[trace_od_rows], trace_results], axis=1)

            # dump the trace results table (with _temp variables) to aid debugging
            tracing.trace_df(df,
                             label='accessibility',
                             index_label='skim_offset',
                             slicer='NONE',
                             warn_if_empty=True)

            if trace_assigned_locals:
                tracing.write_csv(trace_assigned_locals,
                                  file_name="accessibility_locals")

    return (accessibility_df)
Esempio n. 9
0
def demographics_processor(persons, persons_merged, demographics_spec,
                           demographics_settings, chunk_size, trace_hh_id):

    # the choice model will be applied to each row of the choosers table (a pandas.DataFrame)
    persons_df = persons_merged.to_frame()

    logger.info(
        "Running demographics_processor with %d persons (chunk size = %s)" %
        (len(persons_df), chunk_size))

    # locals whose values will be accessible to the execution context
    # when the expressions in spec are applied to choosers
    locals_dict = config.get_model_constants(demographics_settings)
    locals_dict.update(config.setting('globals'))

    trace_rows = trace_hh_id and persons_df['household_id'] == trace_hh_id

    # eval_variables evaluates each of the expressions in spec
    # in the context of each row in of the choosers dataframe
    results, trace_results, trace_assigned_locals \
        = assign.assign_variables(demographics_spec,
                                  persons_df,
                                  locals_dict,
                                  df_alias='persons',
                                  trace_rows=trace_rows)

    # add assigned columns to persons as they are needed by downstream processors
    persons = persons.to_frame()
    assign_in_place(persons, results)
    pipeline.replace_table("persons", persons)

    # coc groups with counts
    # TODO - should we allow specifying which assigned columns are coc (e.g. in settings?)
    # for now, assume all assigned columns are coc, but this could cramp modelers style
    # if they want to create additional demographic columns for downstream use that aren't coc
    coc_columns = list(results.columns)

    inject.add_injectable("coc_column_names", coc_columns)

    # - create table with coc columns as indexes and a single column 'persons' with counts
    # index                        persons
    # coc_poverty coc_age
    # False       False            20
    #             True              3
    # True        False             4
    coc_grouped = results.groupby(coc_columns)
    coc_grouped = coc_grouped[coc_columns[0]].count().to_frame(name='persons')

    pipeline.replace_table("coc_results", coc_grouped)

    add_summary_results(coc_grouped)

    if trace_hh_id:

        if trace_results is not None:

            tracing.write_csv(trace_results,
                              file_name="demographics",
                              index_label='person_idx',
                              column_labels=['label', 'person'])

        if trace_assigned_locals:
            tracing.write_csv(trace_assigned_locals,
                              file_name="demographics_locals")
Esempio n. 10
0
def eval_and_sum(assignment_expressions,
                 df,
                 locals_dict,
                 group_by_column_names=None,
                 df_alias=None,
                 chunk_size=0,
                 trace_rows=None):
    """
    Evaluate assignment_expressions against df, and sum the results
    (sum by group if list of group_by_column_names is specified.
    e.g. group by coc column names and return sums grouped by community of concern.)

    Parameters
    ----------
    assignment_expressions
    df
    locals_dict
    group_by_column_names : array of str
        list of names of the columns to group by (e.g. coc_column_names of trip_coc_end)
    df_alias : str
        assign_variables df_alias (name of df in assignment_expressions)
    chunk_size : int
    trace_rows : array of bool
        array indicating which rows in df are to be traced

    Returns
    -------

    """

    if group_by_column_names is None:
        group_by_column_names = []

    rows_per_chunk, effective_chunk_size = \
        calc_rows_per_chunk(chunk_size, df, assignment_expressions,
                            extra_columns=len(group_by_column_names),
                            trace_label='eval_and_sum')

    logger.info("eval_and_sum chunk_size %s rows_per_chunk %s df rows %s" %
                (effective_chunk_size, rows_per_chunk, df.shape[0]))

    summary = None
    result_list = []
    trace_results = []
    trace_assigned_locals = {}

    for i, num_chunks, df_chunk, trace_rows_chunk in chunked_df(
            df, rows_per_chunk, trace_rows):

        logger.info("eval_and_sum chunk %s of %s" % (i, num_chunks))

        logger.debug("eval_and_sum chunk %s assign variables" % (i, ))
        assigned_chunk, trace_chunk, trace_assigned_locals_chunk = \
            assign.assign_variables(assignment_expressions,
                                    df_chunk,
                                    locals_dict=locals_dict,
                                    df_alias=df_alias,
                                    trace_rows=trace_rows_chunk)

        # sum this chunk
        logger.debug("eval_and_sum chunk %s sum" % (i, ))
        if group_by_column_names:
            # concat in the group_by columns
            for c in group_by_column_names:
                assigned_chunk[c] = df_chunk[c]
            # sum this chunk
            summary = assigned_chunk.groupby(group_by_column_names).sum()
        else:
            summary = assigned_chunk.sum().to_frame().T

        result_list.append(summary)

        if trace_chunk is not None:
            trace_results.append(trace_chunk)

        if trace_assigned_locals_chunk is not None:
            trace_assigned_locals.update(trace_assigned_locals_chunk)

        # note: chunk size will log low if there are more spec temp vars than extra_columns
        trace_label = 'eval_and_sum chunk_%s' % i
        chunk.log_open(trace_label, chunk_size, effective_chunk_size)
        chunk.log_df(trace_label, 'df_chunk', df_chunk)
        chunk.log_df(trace_label, 'assigned_chunk', assigned_chunk)
        chunk.log_close(trace_label)

    assert result_list

    # squash multiple chunk summaries
    if len(result_list) > 1:
        logger.debug("eval_and_sum squash chunk summaries")

        summary = pd.concat(result_list)

        if group_by_column_names:
            summary.reset_index(inplace=True)
            summary = summary.groupby(group_by_column_names).sum()
        else:
            summary = summary.sum().to_frame().T

    if trace_results:
        trace_results = pd.concat(trace_results)
        # trace_rows index values should match index of original df
        trace_results.index = df[trace_rows].index
    else:
        trace_results = None

    return summary, trace_results, trace_assigned_locals
Esempio n. 11
0
    def compute_tap_tap_time(self, recipe, access_df, egress_df,
                             chooser_attributes, path_info, trace_label,
                             trace):

        trace_label = tracing.extend_trace_label(trace_label,
                                                 'compute_tap_tap_time')

        with chunk.chunk_log(trace_label):

            model_constants = self.network_los.setting(
                f'TVPB_SETTINGS.{recipe}.CONSTANTS')
            tap_tap_settings = self.network_los.setting(
                f'TVPB_SETTINGS.{recipe}.tap_tap_settings')

            with memo(
                    "#TVPB CACHE compute_tap_tap_utilities all_transit_paths"):
                transit_df = self.all_transit_paths(access_df, egress_df,
                                                    chooser_attributes,
                                                    trace_label, trace)
                # note: transit_df index is arbitrary
                chunk.log_df(trace_label, "transit_df", transit_df)

            # some expressions may want to know access mode -
            locals_dict = path_info.copy()
            locals_dict['los'] = self.network_los
            locals_dict.update(model_constants)

            assignment_spec = assign.read_assignment_spec(
                file_name=config.config_file_path(tap_tap_settings['SPEC']))

            DEDUPE = True
            if DEDUPE:

                # assign uid for reduping
                max_atap = transit_df.atap.max() + 1
                transit_df[
                    'uid'] = transit_df.btap * max_atap + transit_df.atap

                # dedupe
                chooser_attribute_columns = list(chooser_attributes.columns)
                unique_transit_df = \
                    transit_df.loc[~transit_df.uid.duplicated(), ['btap', 'atap', 'uid'] + chooser_attribute_columns]
                unique_transit_df.set_index('uid', inplace=True)
                chunk.log_df(trace_label, "unique_transit_df",
                             unique_transit_df)

                logger.debug(
                    f"#TVPB CACHE deduped transit_df from {len(transit_df)} to {len(unique_transit_df)}"
                )

                # assign_variables
                results, _, _ = assign.assign_variables(
                    assignment_spec, unique_transit_df, locals_dict)
                assert len(results.columns == 1)
                unique_transit_df['transit'] = results

                # redupe results back into transit_df
                with memo("#TVPB compute_tap_tap_time redupe transit_df"):
                    transit_df['transit'] = reindex(unique_transit_df.transit,
                                                    transit_df.uid)

                del transit_df['uid']
                del unique_transit_df
                chunk.log_df(trace_label, "transit_df", transit_df)
                chunk.log_df(trace_label, "unique_transit_df", None)

            else:
                results, _, _ = assign.assign_variables(
                    assignment_spec, transit_df, locals_dict)
                assert len(results.columns == 1)
                transit_df['transit'] = results

            # filter out unavailable btap_atap pairs
            logger.debug(
                f"{(transit_df['transit'] <= 0).sum()} unavailable tap_tap pairs out of {len(transit_df)}"
            )
            transit_df = transit_df[transit_df.transit > 0]

            transit_df.drop(columns=chooser_attributes.columns, inplace=True)

            chunk.log_df(trace_label, "transit_df", None)

            if trace:
                self.trace_df(transit_df, trace_label, 'transit_df')

        return transit_df
Esempio n. 12
0
    def compute_maz_tap_utilities(self, recipe, maz_od_df, chooser_attributes,
                                  leg, mode, trace_label, trace):

        trace_label = tracing.extend_trace_label(trace_label,
                                                 f'maz_tap_utils.{leg}')

        with chunk.chunk_log(trace_label):

            maz_tap_settings = \
                self.network_los.setting(f'TVPB_SETTINGS.{recipe}.maz_tap_settings.{mode}')
            chooser_columns = maz_tap_settings['CHOOSER_COLUMNS']
            attribute_columns = list(
                chooser_attributes.columns
            ) if chooser_attributes is not None else []
            model_constants = self.network_los.setting(
                f'TVPB_SETTINGS.{recipe}.CONSTANTS')

            if leg == 'access':
                maz_col = 'omaz'
                tap_col = 'btap'
            else:
                maz_col = 'dmaz'
                tap_col = 'atap'

            # maz_to_tap access/egress utilities
            # deduped utilities_df - one row per chooser for each boarding tap (btap) accessible from omaz
            utilities_df = self.network_los.maz_to_tap_dfs[mode]

            utilities_df = utilities_df[chooser_columns]. \
                reset_index(drop=False). \
                rename(columns={'MAZ': maz_col, 'TAP': tap_col})
            utilities_df = pd.merge(maz_od_df[['idx',
                                               maz_col]].drop_duplicates(),
                                    utilities_df,
                                    on=maz_col,
                                    how='inner')
            # add any supplemental chooser attributes (e.g. demographic_segment, tod)
            for c in attribute_columns:
                utilities_df[c] = reindex(chooser_attributes[c],
                                          utilities_df['idx'])

            chunk.log_df(trace_label, "utilities_df", utilities_df)

            if self.units_for_recipe(recipe) == 'utility':

                utilities_df[leg] = compute_utilities(
                    self.network_los,
                    maz_tap_settings,
                    utilities_df,
                    model_constants=model_constants,
                    trace_label=trace_label,
                    trace=trace,
                    trace_column_names=['idx', maz_col, tap_col]
                    if trace else None)

                chunk.log_df(trace_label, "utilities_df",
                             utilities_df)  # annotated

            else:

                assignment_spec = \
                    assign.read_assignment_spec(file_name=config.config_file_path(maz_tap_settings['SPEC']))

                results, _, _ = assign.assign_variables(
                    assignment_spec, utilities_df, model_constants)
                assert len(results.columns == 1)
                utilities_df[leg] = results

            chunk.log_df(trace_label, "utilities_df", utilities_df)

            if trace:
                self.trace_df(utilities_df, trace_label, 'utilities_df')

            # drop utility computation columns ('tod', 'demographic_segment' and maz_to_tap_df time/distance columns)
            utilities_df.drop(columns=attribute_columns + chooser_columns,
                              inplace=True)

        return utilities_df
Esempio n. 13
0
def physical_activity_processor(trips_with_demographics, persons_merged,
                                physical_activity_trip_spec,
                                physical_activity_person_spec,
                                physical_activity_settings, coc_column_names,
                                settings, chunk_size, trace_hh_id):
    """
    Compute physical benefits

    Physical activity benefits generally accrue if the net physical activity for an individual
    exceeds a certain threshold. We calculate individual physical activity based on trips,
    so we need to compute trip activity and then sum up to the person level to calculate benefits.
    We chunk trips by household id to ensure that all of a persons trips are in the same chunk.
    """

    trips_df = trips_with_demographics.to_frame()
    persons_df = persons_merged.to_frame()
    trace_label = 'physical_activity'

    logger.info(
        "Running physical_activity_processor with %d trips for %d persons " %
        (len(trips_df), len(persons_df)))

    locals_dict = config.get_model_constants(physical_activity_settings)
    locals_dict.update(config.setting('globals'))

    trip_trace_rows = trace_hh_id and trips_df.household_id == trace_hh_id

    rows_per_chunk, effective_chunk_size = \
        physical_activity_rpc(chunk_size, trips_df, persons_df,
                              physical_activity_trip_spec, trace_label)

    logger.info("physical_activity_processor chunk_size %s rows_per_chunk %s" %
                (chunk_size, rows_per_chunk))

    coc_summary = None
    result_list = []

    # iterate over trips df chunked by hh_id
    for i, num_chunks, trips_chunk, trace_rows_chunk \
            in bca.chunked_df_by_chunk_id(trips_df, trip_trace_rows, rows_per_chunk):

        logger.info("%s chunk %s of %s" % (trace_label, i, num_chunks))

        trip_activity, trip_trace_results, trip_trace_assigned_locals = \
            assign.assign_variables(physical_activity_trip_spec,
                                    trips_chunk,
                                    locals_dict=locals_dict,
                                    df_alias='trips',
                                    trace_rows=trace_rows_chunk)

        # since tracing is at household level, trace_results will occur in only one chunk
        # we can just write them out when we see them without need to accumulate across chunks
        if trip_trace_results is not None:
            tracing.write_csv(trip_trace_results,
                              file_name="physical_activity_trips",
                              index_label='trip_id',
                              column_labels=['label', 'trip'])

            if trip_trace_assigned_locals:
                tracing.write_csv(trip_trace_assigned_locals,
                                  file_name="physical_activity_trips_locals")

        # sum trip activity for each unique person
        trip_activity = trip_activity.groupby(trips_chunk.person_id).sum()

        # merge in persons columns for this chunk
        persons_chunk = pd.merge(trip_activity,
                                 persons_df,
                                 left_index=True,
                                 right_index=True)

        # trace rows array for this chunk
        person_trace_rows = trace_hh_id and persons_chunk[
            'household_id'] == trace_hh_id

        person_activity, person_trace_results, person_trace_assigned_locals = \
            assign.assign_variables(physical_activity_person_spec,
                                    persons_chunk,
                                    locals_dict=locals_dict,
                                    df_alias='persons',
                                    trace_rows=person_trace_rows)

        # since tracing is at household level, trace_results will occur in only one chunk
        # we can just write them out when we see them without need to accumulate across chunks
        if person_trace_results is not None:
            tracing.write_csv(person_trace_results,
                              file_name="physical_activity_persons",
                              index_label='persons_merged_table_index',
                              column_labels=['label', 'person'])

            if person_trace_assigned_locals:
                tracing.write_csv(person_trace_assigned_locals,
                                  file_name="physical_activity_persons_locals")

        # concat in the coc columns and summarize the chunk by coc
        person_activity = pd.concat(
            [persons_chunk[coc_column_names], person_activity], axis=1)
        coc_summary = person_activity.groupby(coc_column_names).sum()

        result_list.append(coc_summary)

        chunk_trace_label = 'trace_label chunk_%s' % i
        chunk.log_open(chunk_trace_label, chunk_size, effective_chunk_size)
        chunk.log_df(chunk_trace_label, 'trips_chunk', trips_chunk)
        chunk.log_df(chunk_trace_label, 'persons_chunk', persons_chunk)
        chunk.log_close(chunk_trace_label)

    if len(result_list) > 1:

        # (if there was only one chunk, then concat is redundant)
        coc_summary = pd.concat(result_list)

        # squash the accumulated chunk summaries by reapplying group and sum
        coc_summary.reset_index(inplace=True)
        coc_summary = coc_summary.groupby(coc_column_names).sum()

    result_prefix = 'PA_'
    add_result_columns("coc_results", coc_summary, result_prefix)
    add_summary_results(coc_summary,
                        prefix=result_prefix,
                        spec=physical_activity_person_spec)
Esempio n. 14
0
def best_transit_path(set_random_seed, network_los, best_transit_path_spec):

    model_settings = config.read_model_settings('best_transit_path.yaml')

    logger.info("best_transit_path VECTOR_TEST_SIZE %s", VECTOR_TEST_SIZE)

    omaz = network_los.maz_df.sample(VECTOR_TEST_SIZE, replace=True).index
    dmaz = network_los.maz_df.sample(VECTOR_TEST_SIZE, replace=True).index
    tod = np.random.choice(['AM', 'PM'], VECTOR_TEST_SIZE)
    od_df = pd.DataFrame({'omaz': omaz, 'dmaz': dmaz, 'tod': tod})

    trace_od = (od_df.omaz[0], od_df.dmaz[0])
    logger.info("trace_od omaz %s dmaz %s" % trace_od)

    # build exploded atap_btap_df

    # FIXME - pathological knowledge about mode - should be parameterized
    # filter out rows with no drive time omaz-btap or no walk time from dmaz-atap
    atap_btap_df = network_los.get_tappairs_mazpairs(
        od_df.omaz,
        od_df.dmaz,
        ofilter='drive_time',
        dfilter='walk_alightingActual')

    # add in tod column
    atap_btap_df = atap_btap_df.merge(right=od_df[['tod']],
                                      left_on='idx',
                                      right_index=True,
                                      how='left')

    logger.info("len od_df %s", len(od_df.index))
    logger.info("len atap_btap_df %s", len(atap_btap_df.index))
    logger.info("avg explosion %s",
                (len(atap_btap_df.index) / (1.0 * len(od_df.index))))

    if trace_od:
        trace_orig, trace_dest = trace_od
        trace_oabd_rows = (atap_btap_df.omaz
                           == trace_orig) & (atap_btap_df.dmaz == trace_dest)
    else:
        trace_oabd_rows = None

    constants = config.get_model_constants(model_settings)
    locals_d = {'np': np, 'network_los': network_los}
    if constants is not None:
        locals_d.update(constants)

    results, trace_results, trace_assigned_locals \
        = assign.assign_variables(best_transit_path_spec, atap_btap_df, locals_d,
                                  trace_rows=trace_oabd_rows)

    # copy results
    for column in results.columns:
        atap_btap_df[column] = results[column]

    # drop rows if no utility
    n = len(atap_btap_df.index)
    atap_btap_df = atap_btap_df.dropna(subset=['utility'])

    logger.info("Dropped %s of %s rows with null utility",
                n - len(atap_btap_df.index), n)

    # choose max utility
    atap_btap_df = atap_btap_df.sort_values(
        by='utility').groupby('idx').tail(1)

    if trace_od:

        if not trace_oabd_rows.any():
            logger.warning("trace_od not found origin = %s, dest = %s",
                           trace_orig, trace_dest)
        else:

            tracing.trace_df(atap_btap_df,
                             label='best_transit_path',
                             slicer='NONE',
                             transpose=False)

            tracing.trace_df(trace_results,
                             label='trace_best_transit_path',
                             slicer='NONE',
                             transpose=False)

            if trace_assigned_locals:
                tracing.write_csv(trace_assigned_locals,
                                  file_name="trace_best_transit_path_locals")
Esempio n. 15
0
def aggregate_od_processor(zone_districts, zones, data_dir, trace_od):

    trace_label = 'aggregate_od'

    logger.info("Running %s" % (trace_label, ))

    model_settings = config.read_model_settings('aggregate_od.yaml')

    spec_file_name = model_settings.get('spec_file_name', 'aggregate_od.csv')
    aggregate_od_spec = bca.read_assignment_spec(spec_file_name)

    zones = zones.to_frame()
    zone_districts = zone_districts.to_frame()
    zone_count = zone_districts.shape[0]

    assert zones.index.equals(zone_districts.index)

    # create OD dataframe in order compatible with ODSkims
    od_df = pd.DataFrame(
        data={
            'orig': np.repeat(np.asanyarray(zones.index), zone_count),
            'dest': np.tile(np.asanyarray(zones.index), zone_count),
        })

    # locals whose values will be accessible to the execution context
    # when the expressions in spec are applied to choosers
    locals_dict = config.get_model_constants(model_settings)
    locals_dict.update(config.setting('globals'))
    locals_dict['logger'] = logger

    logger.debug('%s mem before create_skim_locals_dict, %s' % (
        trace_label,
        memory_info(),
    ))

    # - add ODSkims to locals (note: we use local_skims list later to close omx files)
    cache_skims = model_settings.get('cache_skims', False)
    local_skims = create_skim_locals_dict(model_settings, data_dir, zones,
                                          cache_skims)
    locals_dict.update(local_skims)

    # - create_zone_matrices dicts
    locals_dict.update(create_zone_matrices(model_settings, zones))

    if trace_od:
        trace_orig, trace_dest = trace_od
        trace_od_rows = (od_df.orig == trace_orig) & (od_df.dest == trace_dest)
    else:
        trace_od_rows = None

    logger.debug("%s assigning variables" % (trace_label, ))
    results, trace_results, trace_assigned_locals = \
        assign.assign_variables(aggregate_od_spec,
                                od_df,
                                locals_dict=locals_dict,
                                df_alias='od',
                                trace_rows=trace_od_rows)

    logger.debug('%s mem after assign_variables, %s' % (
        trace_label,
        memory_info(),
    ))

    for local_name, od_skims in local_skims.items():
        logger.debug("closing %s" % local_name)
        od_skims.log_skim_usage()
        od_skims.close()

    # summarize aggregate_od_benefits by orig and dest districts
    logger.debug("%s district summary" % (trace_label, ))
    results['orig'] = np.repeat(np.asanyarray(zone_districts.district),
                                zone_count)
    results['dest'] = np.tile(np.asanyarray(zone_districts.district),
                              zone_count)
    district_summary = results.groupby(['orig', 'dest']).sum()
    pipeline.replace_table('aggregate_od_district_summary', district_summary)

    # attribute aggregate_results benefits to origin zone
    logger.debug("%s zone summary" % (trace_label, ))
    results['orig'] = od_df['orig']
    del results['dest']
    zone_summary = results.groupby(['orig']).sum()
    pipeline.replace_table('aggregate_od_zone_summary', zone_summary)

    add_aggregate_results(zone_summary, aggregate_od_spec, source=trace_label)

    if trace_results is not None:
        tracing.write_csv(trace_results,
                          file_name=trace_label,
                          index_label='index',
                          column_labels=['label', 'od'])

        if trace_assigned_locals:
            tracing.write_csv(trace_assigned_locals,
                              file_name="%s_locals" % trace_label,
                              index_label='variable',
                              columns='value')
Esempio n. 16
0
def compute_accessibility(settings, accessibility_spec, accessibility_settings,
                          skim_dict, omx_file, land_use, trace_od):
    """
    Compute accessibility for each zone in land use file using expressions from accessibility_spec

    The actual results depend on the expressions in accessibility_spec, but this is initially
    intended to permit implementation of the mtc accessibility calculation as implemented by
    Accessibility.job

    Compute measures of accessibility used by the automobile ownership model.
    The accessibility measure first multiplies an employment variable by a mode-specific decay
    function.  The product reflects the difficulty of accessing the activities the farther
    (in terms of round-trip travel time) the jobs are from the location in question. The products
    to each destination zone are next summed over each origin zone, and the logarithm of the
    product mutes large differences.  The decay function on the walk accessibility measure is
    steeper than automobile or transit.  The minimum accessibility is zero.
    """

    logger.info("Running compute_accessibility")

    constants = config.get_model_constants(accessibility_settings)
    land_use_columns = accessibility_settings.get('land_use_columns', [])

    land_use_df = land_use.to_frame()

    zone_count = len(land_use_df.index)

    # create OD dataframe
    od_df = pd.DataFrame(
        data={
            'orig': np.repeat(np.asanyarray(land_use_df.index), zone_count),
            'dest': np.tile(np.asanyarray(land_use_df.index), zone_count)
        })

    if trace_od:
        trace_orig, trace_dest = trace_od
        trace_od_rows = (od_df.orig == trace_orig) & (od_df.dest == trace_dest)
    else:
        trace_od_rows = None

    # merge land_use_columns into od_df
    land_use_df = land_use_df[land_use_columns]
    od_df = pd.merge(od_df, land_use_df, left_on='dest',
                     right_index=True).sort_index()

    locals_d = {
        'log':
        np.log,
        'exp':
        np.exp,
        'skim_od':
        AccessibilitySkims(skim_dict, omx_file, zone_count),
        'skim_do':
        AccessibilitySkims(skim_dict, omx_file, zone_count, transpose=True)
    }
    if constants is not None:
        locals_d.update(constants)

    results, trace_results, trace_assigned_locals \
        = assign.assign_variables(accessibility_spec, od_df, locals_d, trace_rows=trace_od_rows)
    accessibility_df = pd.DataFrame(index=land_use.index)
    for column in results.columns:
        data = np.asanyarray(results[column])
        data.shape = (zone_count, zone_count)
        accessibility_df[column] = np.log(np.sum(data, axis=1) + 1)

        inject.add_column("accessibility", column, accessibility_df[column])

    if trace_od:

        if not trace_od_rows.any():
            logger.warn("trace_od not found origin = %s, dest = %s" %
                        (trace_orig, trace_dest))
        else:

            # add OD columns to trace results
            df = pd.concat([od_df[trace_od_rows], trace_results], axis=1)

            # dump the trace results table (with _temp variables) to aid debugging
            # note that this is not the same as the orca-injected accessibility table
            # FIXME - should we name this differently and also dump the updated accessibility table?
            tracing.trace_df(df,
                             label='accessibility',
                             index_label='skim_offset',
                             slicer='NONE',
                             warn_if_empty=True)

            if trace_assigned_locals:
                tracing.write_csv(trace_assigned_locals,
                                  file_name="accessibility_locals")
Esempio n. 17
0
def compute_columns(df, model_settings, configs_dir, trace_label=None):
    """
    Evaluate expressions_spec in context of df, with optional additional pipeline tables in locals

    Parameters
    ----------
    df : pandas DataFrame
        or if None, expect name of pipeline table to be specified by DF in model_settings
    model_settings : dict or str
        dict with keys:
            DF - df_alias and (additionally, if df is None) name of pipeline table to load as df
            SPEC - name of expressions file (csv suffix optional) if different from model_settings
            TABLES - list of pipeline tables to load and make available as (read only) locals
        str:
            name of yaml file in confirs_dir to load dict from
    configs_dir
    trace_label

    Returns
    -------
    results: pandas.DataFrame
        one column for each expression (except temps with ALL_CAP target names)
        same index as df
    """

    if isinstance(model_settings, str):
        model_settings_name = model_settings
        model_settings = config.read_model_settings(configs_dir, '%s.yaml' % model_settings)
        assert model_settings, "Found no model settings for %s" % model_settings_name
    else:
        model_settings_name = 'dict'

    assert 'DF' in model_settings, \
        "Expected to find 'DF' in %s" % model_settings_name

    df_name = model_settings.get('DF')
    helper_table_names = model_settings.get('TABLES', [])
    expressions_spec_name = model_settings.get('SPEC', model_settings_name)

    assert expressions_spec_name is not None, \
        "Expected to find 'SPEC' in %s" % model_settings_name

    if trace_label is None:
        trace_label = expressions_spec_name

    if not expressions_spec_name.endswith(".csv"):
        expressions_spec_name = '%s.csv' % expressions_spec_name
    expressions_spec = assign.read_assignment_spec(os.path.join(configs_dir, expressions_spec_name))

    tables = {t: inject.get_table(t).to_frame() for t in helper_table_names}

    # if df was passed in, df might be a slice, or any other table, but DF is it's local alias
    assert df_name not in tables, "Did not expect to find df '%s' in TABLES" % df_name
    tables[df_name] = df

    locals_dict = local_utilities()
    locals_dict.update(tables)

    results, trace_results, trace_assigned_locals \
        = assign.assign_variables(expressions_spec,
                                  df,
                                  locals_dict,
                                  trace_rows=tracing.trace_targets(df))

    if trace_results is not None:
        tracing.trace_df(trace_results,
                         label=trace_label,
                         slicer='NONE',
                         warn_if_empty=True)

    if trace_assigned_locals:
        tracing.write_csv(trace_assigned_locals, file_name="%s_locals" % trace_label)

    return results
Esempio n. 18
0
def compute_columns(df, model_settings, locals_dict={}, trace_label=None):
    """
    Evaluate expressions_spec in context of df, with optional additional pipeline tables in locals

    Parameters
    ----------
    df : pandas DataFrame
        or if None, expect name of pipeline table to be specified by DF in model_settings
    model_settings : dict or str
        dict with keys:
            DF - df_alias and (additionally, if df is None) name of pipeline table to load as df
            SPEC - name of expressions file (csv suffix optional) if different from model_settings
            TABLES - list of pipeline tables to load and make available as (read only) locals
        str:
            name of yaml file in configs_dir to load dict from
    locals_dict : dict
        dict of locals (e.g. utility functions) to add to the execution environment
    trace_label

    Returns
    -------
    results: pandas.DataFrame
        one column for each expression (except temps with ALL_CAP target names)
        same index as df
    """

    if isinstance(model_settings, str):
        model_settings_name = model_settings
        model_settings = config.read_model_settings('%s.yaml' % model_settings)
        assert model_settings, "Found no model settings for %s" % model_settings_name
    else:
        model_settings_name = 'dict'
        assert isinstance(model_settings, dict)

    assert 'DF' in model_settings, \
        "Expected to find 'DF' in %s" % model_settings_name

    df_name = model_settings.get('DF')
    helper_table_names = model_settings.get('TABLES', [])
    expressions_spec_name = model_settings.get('SPEC', None)

    assert expressions_spec_name is not None, \
        "Expected to find 'SPEC' in %s" % model_settings_name

    trace_label = tracing.extend_trace_label(trace_label or '',
                                             expressions_spec_name)

    if not expressions_spec_name.endswith(".csv"):
        expressions_spec_name = '%s.csv' % expressions_spec_name
    logger.debug(
        f"{trace_label} compute_columns using expression spec file {expressions_spec_name}"
    )
    expressions_spec = assign.read_assignment_spec(
        config.config_file_path(expressions_spec_name))

    assert expressions_spec.shape[0] > 0, \
        "Expected to find some assignment expressions in %s" % expressions_spec_name

    tables = {t: inject.get_table(t).to_frame() for t in helper_table_names}

    # if df was passed in, df might be a slice, or any other table, but DF is it's local alias
    assert df_name not in tables, "Did not expect to find df '%s' in TABLES" % df_name
    tables[df_name] = df

    # be nice and also give it to them as df?
    tables['df'] = df

    _locals_dict = assign.local_utilities()
    _locals_dict.update(locals_dict)
    _locals_dict.update(tables)

    # FIXME a number of asim model preprocessors want skim_dict - should they request it in model_settings.TABLES?
    _locals_dict.update({
        # 'los': inject.get_injectable('network_los', None),
        'skim_dict': inject.get_injectable('skim_dict', None),
    })

    results, trace_results, trace_assigned_locals \
        = assign.assign_variables(expressions_spec,
                                  df,
                                  _locals_dict,
                                  trace_rows=tracing.trace_targets(df))

    if trace_results is not None:
        tracing.trace_df(trace_results, label=trace_label, slicer='NONE')

    if trace_assigned_locals:
        tracing.write_csv(trace_assigned_locals,
                          file_name="%s_locals" % trace_label)

    return results