def create_od_table(od_index, spec, locals_dict, trace_od): """Assign variables with ActivitySim's assign and register output to pipeline Parameters ---------- od_index : pandas MultiIndex spec : pandas DataFrame, assignment expressions locals_dict : dict, dictionary containing constants and zone matrices trace_od : list or dict, origin-destination pair Returns ------- od_table : pandas DataFrame all origin-destination pairs """ logger.info('creating OD table ...') od_df = od_index.to_frame(index=False) trace_rows = trace.trace_filter(od_df, trace_od) od_table, trace_results, _ = assign.assign_variables( spec, od_df, locals_dict=locals_dict, trace_rows=trace_rows) if trace_results is not None: tracing.write_csv(trace_results, file_name='od_table', transpose=False) od_table.set_index(od_index, inplace=True) logger.info('registering OD table to pipeline ...') pipeline.replace_table('od_table', od_table) create_zone_summary(od_table.reset_index()) return od_table
def aggregate_zone_processor(zones, trace_od): """ zones: orca table zone data for base and build scenario dat files combined into a single dataframe with columns names prefixed with base_ or build_ indexed by ZONE """ trace_label = 'aggregate_zone' model_settings = config.read_model_settings('aggregate_zone.yaml') spec_file_name = model_settings.get('spec_file_name', 'aggregate_zone.csv') aggregate_zone_spec = bca.read_assignment_spec(spec_file_name) zones_df = zones.to_frame() logger.info("Running aggregate_zone_processor with %d zones" % (len(zones_df.index), )) if trace_od: trace_orig, trace_dest = trace_od trace_od_rows = (zones_df.index == trace_orig) | (zones_df.index == trace_dest) else: trace_od_rows = None # locals whose values will be accessible to the execution context # when the expressions in spec are applied to choosers locals_dict = config.get_model_constants(model_settings) locals_dict.update(config.setting('globals')) # eval_variables evaluates each of the expressions in spec # in the context of each row in of the choosers dataframe results, trace_results, trace_assigned_locals = \ assign.assign_variables(aggregate_zone_spec, zones_df, locals_dict, df_alias='zones', trace_rows=trace_od_rows) pipeline.replace_table('aggregate_zone_summary', results) if trace_results is not None: tracing.write_csv(trace_results, file_name="aggregate_zone", index_label='zone', column_labels=['label', 'zone']) if trace_assigned_locals: tracing.write_csv(trace_assigned_locals, file_name="aggregate_zone_locals")
def compute_tap_tap_time(self, recipe, access_df, egress_df, chooser_attributes, trace_label, trace): trace_label = tracing.extend_trace_label(trace_label, 'compute_tap_tap_time') model_constants = self.network_los.setting( f'TVPB_SETTINGS.{recipe}.CONSTANTS') tap_tap_settings = self.network_los.setting( f'TVPB_SETTINGS.{recipe}.tap_tap_settings') with memo("#TVPB CACHE compute_tap_tap_utilities all_transit_paths"): transit_df = self.all_transit_paths(access_df, egress_df, chooser_attributes, trace_label, trace) # note: transit_df index is arbitrary chunk.log_df(trace_label, "transit_df", transit_df) locals_d = {'los': self.network_los} locals_d.update(model_constants) assignment_spec = assign.read_assignment_spec( file_name=config.config_file_path(tap_tap_settings['SPEC'])) results, _, _ = assign.assign_variables(assignment_spec, transit_df, locals_d) assert len(results.columns == 1) transit_df['transit'] = results # filter out unavailable btap_atap pairs logger.debug( f"{(transit_df['transit'] <= 0).sum()} unavailable tap_tap pairs out of {len(transit_df)}" ) transit_df = transit_df[transit_df.transit > 0] transit_df.drop(columns=chooser_attributes.columns, inplace=True) chunk.log_df(trace_label, "transit_df", None) if trace: self.trace_df(transit_df, trace_label, 'transit_df') return transit_df
def aggregate_demographics_processor(zone_hhs, aggregate_demographics_spec, settings, trace_od): """ Parameters ---------- zone_hhs : orca table input zone demographics """ trace_label = 'aggregate_demographics' model_settings = config.read_model_settings('aggregate_demographics.yaml') zone_hhs_df = zone_hhs.to_frame() logger.info("Running %s with %d zones" % ( trace_label, len(zone_hhs_df), )) if trace_od: trace_orig, trace_dest = trace_od trace_od_rows = (zone_hhs_df.index == trace_orig) | (zone_hhs_df.index == trace_dest) else: trace_od_rows = None # locals whose values will be accessible to the execution context # when the expressions in spec are applied to choosers locals_dict = config.get_model_constants(model_settings) locals_dict.update(config.setting('globals')) trace_rows = None # eval_variables evaluates each of the expressions in spec # in the context of each row in of the choosers dataframe results, trace_results, trace_assigned_locals = \ assign.assign_variables(aggregate_demographics_spec, zone_hhs_df, locals_dict, df_alias='hhs', trace_rows=trace_od_rows) pipeline.replace_table("zone_demographics", results) # expression file can use silos column to designate result targets (e.g. count of households) add_aggregate_results(results, aggregate_demographics_spec, source=trace_label) if trace_results is not None: tracing.write_csv(trace_results, file_name="aggregate_demographics", index_label='zone', column_labels=['label', 'zone']) if trace_assigned_locals: tracing.write_csv(trace_assigned_locals, file_name="aggregate_demographics_locals")
def compute_accessibility(accessibility, skim_dict, land_use, trace_od): """ Compute accessibility for each zone in land use file using expressions from accessibility_spec The actual results depend on the expressions in accessibility_spec, but this is initially intended to permit implementation of the mtc accessibility calculation as implemented by Accessibility.job Compute measures of accessibility used by the automobile ownership model. The accessibility measure first multiplies an employment variable by a mode-specific decay function. The product reflects the difficulty of accessing the activities the farther (in terms of round-trip travel time) the jobs are from the location in question. The products to each destination zone are next summed over each origin zone, and the logarithm of the product mutes large differences. The decay function on the walk accessibility measure is steeper than automobile or transit. The minimum accessibility is zero. """ trace_label = 'compute_accessibility' model_settings = config.read_model_settings('accessibility.yaml') assignment_spec = assign.read_assignment_spec(config.config_file_path('accessibility.csv')) accessibility_df = accessibility.to_frame() logger.info("Running %s with %d dest zones" % (trace_label, len(accessibility_df))) constants = config.get_model_constants(model_settings) land_use_columns = model_settings.get('land_use_columns', []) land_use_df = land_use.to_frame() # #bug # # land_use_df = land_use_df[land_use_df.index % 2 == 1] # accessibility_df = accessibility_df[accessibility_df.index.isin(land_use_df.index)].head(5) # # print "land_use_df", land_use_df.index # print "accessibility_df", accessibility_df.index # #bug orig_zones = accessibility_df.index.values dest_zones = land_use_df.index.values orig_zone_count = len(orig_zones) dest_zone_count = len(dest_zones) logger.info("Running %s with %d dest zones %d orig zones" % (trace_label, dest_zone_count, orig_zone_count)) # create OD dataframe od_df = pd.DataFrame( data={ 'orig': np.repeat(np.asanyarray(accessibility_df.index), dest_zone_count), 'dest': np.tile(np.asanyarray(land_use_df.index), orig_zone_count) } ) if trace_od: trace_orig, trace_dest = trace_od trace_od_rows = (od_df.orig == trace_orig) & (od_df.dest == trace_dest) else: trace_od_rows = None # merge land_use_columns into od_df land_use_df = land_use_df[land_use_columns] od_df = pd.merge(od_df, land_use_df, left_on='dest', right_index=True).sort_index() locals_d = { 'log': np.log, 'exp': np.exp, 'skim_od': AccessibilitySkims(skim_dict, orig_zones, dest_zones), 'skim_do': AccessibilitySkims(skim_dict, orig_zones, dest_zones, transpose=True) } if constants is not None: locals_d.update(constants) results, trace_results, trace_assigned_locals \ = assign.assign_variables(assignment_spec, od_df, locals_d, trace_rows=trace_od_rows) for column in results.columns: data = np.asanyarray(results[column]) data.shape = (orig_zone_count, dest_zone_count) accessibility_df[column] = np.log(np.sum(data, axis=1) + 1) # - write table to pipeline pipeline.replace_table("accessibility", accessibility_df) if trace_od: if not trace_od_rows.any(): logger.warning("trace_od not found origin = %s, dest = %s" % (trace_orig, trace_dest)) else: # add OD columns to trace results df = pd.concat([od_df[trace_od_rows], trace_results], axis=1) # dump the trace results table (with _temp variables) to aid debugging tracing.trace_df(df, label='accessibility', index_label='skim_offset', slicer='NONE', warn_if_empty=True) if trace_assigned_locals: tracing.write_csv(trace_assigned_locals, file_name="accessibility_locals")
def best_transit_path(set_random_seed, network_los, best_transit_path_spec): model_settings = config.read_model_settings('best_transit_path.yaml') logger.info("best_transit_path VECTOR_TEST_SIZE %s", VECTOR_TEST_SIZE) omaz = network_los.maz_df.sample(VECTOR_TEST_SIZE, replace=True).index dmaz = network_los.maz_df.sample(VECTOR_TEST_SIZE, replace=True).index tod = np.random.choice(['AM', 'PM'], VECTOR_TEST_SIZE) od_df = pd.DataFrame({'omaz': omaz, 'dmaz': dmaz, 'tod': tod}) trace_od = (od_df.omaz[0], od_df.dmaz[0]) logger.info("trace_od omaz %s dmaz %s" % trace_od) # build exploded atap_btap_df # FIXME - pathological knowledge about mode - should be parameterized # filter out rows with no drive time omaz-btap or no walk time from dmaz-atap atap_btap_df = network_los.get_tappairs_mazpairs(od_df.omaz, od_df.dmaz, ofilter='drive_time', dfilter='walk_alightingActual') # add in tod column atap_btap_df = atap_btap_df.merge( right=od_df[['tod']], left_on='idx', right_index=True, how='left' ) logger.info("len od_df %s", len(od_df.index)) logger.info("len atap_btap_df %s", len(atap_btap_df.index)) logger.info("avg explosion %s", (len(atap_btap_df.index) / (1.0 * len(od_df.index)))) if trace_od: trace_orig, trace_dest = trace_od trace_oabd_rows = (atap_btap_df.omaz == trace_orig) & (atap_btap_df.dmaz == trace_dest) else: trace_oabd_rows = None constants = config.get_model_constants(model_settings) locals_d = { 'np': np, 'network_los': network_los } if constants is not None: locals_d.update(constants) results, trace_results, trace_assigned_locals \ = assign.assign_variables(best_transit_path_spec, atap_btap_df, locals_d, trace_rows=trace_oabd_rows) # copy results for column in results.columns: atap_btap_df[column] = results[column] # drop rows if no utility n = len(atap_btap_df.index) atap_btap_df = atap_btap_df.dropna(subset=['utility']) logger.info("Dropped %s of %s rows with null utility", n - len(atap_btap_df.index), n) # choose max utility atap_btap_df = atap_btap_df.sort_values(by='utility').groupby('idx').tail(1) if trace_od: if not trace_oabd_rows.any(): logger.warning("trace_od not found origin = %s, dest = %s", trace_orig, trace_dest) else: tracing.trace_df(atap_btap_df, label='best_transit_path', slicer='NONE', transpose=False) tracing.trace_df(trace_results, label='trace_best_transit_path', slicer='NONE', transpose=False) if trace_assigned_locals: tracing.write_csv(trace_assigned_locals, file_name="trace_best_transit_path_locals")
def compute_accessibility(accessibility, network_los, land_use, trace_od): """ Compute accessibility for each zone in land use file using expressions from accessibility_spec The actual results depend on the expressions in accessibility_spec, but this is initially intended to permit implementation of the mtc accessibility calculation as implemented by Accessibility.job Compute measures of accessibility used by the automobile ownership model. The accessibility measure first multiplies an employment variable by a mode-specific decay function. The product reflects the difficulty of accessing the activities the farther (in terms of round-trip travel time) the jobs are from the location in question. The products to each destination zone are next summed over each origin zone, and the logarithm of the product mutes large differences. The decay function on the walk accessibility measure is steeper than automobile or transit. The minimum accessibility is zero. """ trace_label = 'compute_accessibility' model_settings = config.read_model_settings('accessibility.yaml') assignment_spec = assign.read_assignment_spec( config.config_file_path('accessibility.csv')) accessibility_df = accessibility.to_frame() logger.info("Running %s with %d dest zones" % (trace_label, len(accessibility_df))) constants = config.get_model_constants(model_settings) land_use_columns = model_settings.get('land_use_columns', []) land_use_df = land_use.to_frame() land_use_df = land_use_df[land_use_columns] # don't assume they are the same: accessibility may be sliced if we are multiprocessing orig_zones = accessibility_df.index.values dest_zones = land_use_df.index.values orig_zone_count = len(orig_zones) dest_zone_count = len(dest_zones) logger.info("Running %s with %d dest zones %d orig zones" % (trace_label, dest_zone_count, orig_zone_count)) # create OD dataframe od_df = pd.DataFrame( data={ 'orig': np.repeat(orig_zones, dest_zone_count), 'dest': np.tile(dest_zones, orig_zone_count) }) if trace_od: trace_orig, trace_dest = trace_od trace_od_rows = (od_df.orig == trace_orig) & (od_df.dest == trace_dest) else: trace_od_rows = None # merge land_use_columns into od_df od_df = pd.merge(od_df, land_use_df, left_on='dest', right_index=True).sort_index() locals_d = { 'log': np.log, 'exp': np.exp, 'network_los': network_los, } skim_dict = network_los.get_default_skim_dict() locals_d['skim_od'] = skim_dict.wrap('orig', 'dest').set_df(od_df) locals_d['skim_do'] = skim_dict.wrap('dest', 'orig').set_df(od_df) if network_los.zone_system == los.THREE_ZONE: locals_d['tvpb'] = TransitVirtualPathBuilder(network_los) if constants is not None: locals_d.update(constants) results, trace_results, trace_assigned_locals \ = assign.assign_variables(assignment_spec, od_df, locals_d, trace_rows=trace_od_rows) for column in results.columns: data = np.asanyarray(results[column]) data.shape = (orig_zone_count, dest_zone_count) # (o,d) accessibility_df[column] = np.log(np.sum(data, axis=1) + 1) logger.info("{trace_label} added {len(results.columns} columns") # - write table to pipeline pipeline.replace_table("accessibility", accessibility_df) if trace_od: if not trace_od_rows.any(): logger.warning( f"trace_od not found origin = {trace_orig}, dest = {trace_dest}" ) else: # add OD columns to trace results df = pd.concat([od_df[trace_od_rows], trace_results], axis=1) # dump the trace results table (with _temp variables) to aid debugging tracing.trace_df(df, label='accessibility', index_label='skim_offset', slicer='NONE', warn_if_empty=True) if trace_assigned_locals: tracing.write_csv(trace_assigned_locals, file_name="accessibility_locals")
def compute_accessibilities_for_zones(accessibility_df, land_use_df, assignment_spec, constants, network_los, trace_od, trace_label): orig_zones = accessibility_df.index.values dest_zones = land_use_df.index.values orig_zone_count = len(orig_zones) dest_zone_count = len(dest_zones) logger.info("Running %s with %d orig zones %d dest zones" % (trace_label, orig_zone_count, dest_zone_count)) # create OD dataframe od_df = pd.DataFrame( data={ 'orig': np.repeat(orig_zones, dest_zone_count), 'dest': np.tile(dest_zones, orig_zone_count) }) if trace_od: trace_orig, trace_dest = trace_od trace_od_rows = (od_df.orig == trace_orig) & (od_df.dest == trace_dest) else: trace_od_rows = None # merge land_use_columns into od_df od_df = pd.merge(od_df, land_use_df, left_on='dest', right_index=True).sort_index() chunk.log_df(trace_label, "od_df", od_df) locals_d = { 'log': np.log, 'exp': np.exp, 'network_los': network_los, } locals_d.update(constants) skim_dict = network_los.get_default_skim_dict() locals_d['skim_od'] = skim_dict.wrap('orig', 'dest').set_df(od_df) locals_d['skim_do'] = skim_dict.wrap('dest', 'orig').set_df(od_df) if network_los.zone_system == los.THREE_ZONE: locals_d['tvpb'] = network_los.tvpb results, trace_results, trace_assigned_locals \ = assign.assign_variables(assignment_spec, od_df, locals_d, trace_rows=trace_od_rows, trace_label=trace_label, chunk_log=True) chunk.log_df(trace_label, "results", results) # accessibility_df = accessibility_df.copy() for column in results.columns: data = np.asanyarray(results[column]) data.shape = (orig_zone_count, dest_zone_count) # (o,d) accessibility_df[column] = np.log(np.sum(data, axis=1) + 1) if trace_od: if not trace_od_rows.any(): logger.warning( f"trace_od not found origin = {trace_orig}, dest = {trace_dest}" ) else: # add OD columns to trace results df = pd.concat([od_df[trace_od_rows], trace_results], axis=1) # dump the trace results table (with _temp variables) to aid debugging tracing.trace_df(df, label='accessibility', index_label='skim_offset', slicer='NONE', warn_if_empty=True) if trace_assigned_locals: tracing.write_csv(trace_assigned_locals, file_name="accessibility_locals") return (accessibility_df)
def demographics_processor(persons, persons_merged, demographics_spec, demographics_settings, chunk_size, trace_hh_id): # the choice model will be applied to each row of the choosers table (a pandas.DataFrame) persons_df = persons_merged.to_frame() logger.info( "Running demographics_processor with %d persons (chunk size = %s)" % (len(persons_df), chunk_size)) # locals whose values will be accessible to the execution context # when the expressions in spec are applied to choosers locals_dict = config.get_model_constants(demographics_settings) locals_dict.update(config.setting('globals')) trace_rows = trace_hh_id and persons_df['household_id'] == trace_hh_id # eval_variables evaluates each of the expressions in spec # in the context of each row in of the choosers dataframe results, trace_results, trace_assigned_locals \ = assign.assign_variables(demographics_spec, persons_df, locals_dict, df_alias='persons', trace_rows=trace_rows) # add assigned columns to persons as they are needed by downstream processors persons = persons.to_frame() assign_in_place(persons, results) pipeline.replace_table("persons", persons) # coc groups with counts # TODO - should we allow specifying which assigned columns are coc (e.g. in settings?) # for now, assume all assigned columns are coc, but this could cramp modelers style # if they want to create additional demographic columns for downstream use that aren't coc coc_columns = list(results.columns) inject.add_injectable("coc_column_names", coc_columns) # - create table with coc columns as indexes and a single column 'persons' with counts # index persons # coc_poverty coc_age # False False 20 # True 3 # True False 4 coc_grouped = results.groupby(coc_columns) coc_grouped = coc_grouped[coc_columns[0]].count().to_frame(name='persons') pipeline.replace_table("coc_results", coc_grouped) add_summary_results(coc_grouped) if trace_hh_id: if trace_results is not None: tracing.write_csv(trace_results, file_name="demographics", index_label='person_idx', column_labels=['label', 'person']) if trace_assigned_locals: tracing.write_csv(trace_assigned_locals, file_name="demographics_locals")
def eval_and_sum(assignment_expressions, df, locals_dict, group_by_column_names=None, df_alias=None, chunk_size=0, trace_rows=None): """ Evaluate assignment_expressions against df, and sum the results (sum by group if list of group_by_column_names is specified. e.g. group by coc column names and return sums grouped by community of concern.) Parameters ---------- assignment_expressions df locals_dict group_by_column_names : array of str list of names of the columns to group by (e.g. coc_column_names of trip_coc_end) df_alias : str assign_variables df_alias (name of df in assignment_expressions) chunk_size : int trace_rows : array of bool array indicating which rows in df are to be traced Returns ------- """ if group_by_column_names is None: group_by_column_names = [] rows_per_chunk, effective_chunk_size = \ calc_rows_per_chunk(chunk_size, df, assignment_expressions, extra_columns=len(group_by_column_names), trace_label='eval_and_sum') logger.info("eval_and_sum chunk_size %s rows_per_chunk %s df rows %s" % (effective_chunk_size, rows_per_chunk, df.shape[0])) summary = None result_list = [] trace_results = [] trace_assigned_locals = {} for i, num_chunks, df_chunk, trace_rows_chunk in chunked_df( df, rows_per_chunk, trace_rows): logger.info("eval_and_sum chunk %s of %s" % (i, num_chunks)) logger.debug("eval_and_sum chunk %s assign variables" % (i, )) assigned_chunk, trace_chunk, trace_assigned_locals_chunk = \ assign.assign_variables(assignment_expressions, df_chunk, locals_dict=locals_dict, df_alias=df_alias, trace_rows=trace_rows_chunk) # sum this chunk logger.debug("eval_and_sum chunk %s sum" % (i, )) if group_by_column_names: # concat in the group_by columns for c in group_by_column_names: assigned_chunk[c] = df_chunk[c] # sum this chunk summary = assigned_chunk.groupby(group_by_column_names).sum() else: summary = assigned_chunk.sum().to_frame().T result_list.append(summary) if trace_chunk is not None: trace_results.append(trace_chunk) if trace_assigned_locals_chunk is not None: trace_assigned_locals.update(trace_assigned_locals_chunk) # note: chunk size will log low if there are more spec temp vars than extra_columns trace_label = 'eval_and_sum chunk_%s' % i chunk.log_open(trace_label, chunk_size, effective_chunk_size) chunk.log_df(trace_label, 'df_chunk', df_chunk) chunk.log_df(trace_label, 'assigned_chunk', assigned_chunk) chunk.log_close(trace_label) assert result_list # squash multiple chunk summaries if len(result_list) > 1: logger.debug("eval_and_sum squash chunk summaries") summary = pd.concat(result_list) if group_by_column_names: summary.reset_index(inplace=True) summary = summary.groupby(group_by_column_names).sum() else: summary = summary.sum().to_frame().T if trace_results: trace_results = pd.concat(trace_results) # trace_rows index values should match index of original df trace_results.index = df[trace_rows].index else: trace_results = None return summary, trace_results, trace_assigned_locals
def compute_tap_tap_time(self, recipe, access_df, egress_df, chooser_attributes, path_info, trace_label, trace): trace_label = tracing.extend_trace_label(trace_label, 'compute_tap_tap_time') with chunk.chunk_log(trace_label): model_constants = self.network_los.setting( f'TVPB_SETTINGS.{recipe}.CONSTANTS') tap_tap_settings = self.network_los.setting( f'TVPB_SETTINGS.{recipe}.tap_tap_settings') with memo( "#TVPB CACHE compute_tap_tap_utilities all_transit_paths"): transit_df = self.all_transit_paths(access_df, egress_df, chooser_attributes, trace_label, trace) # note: transit_df index is arbitrary chunk.log_df(trace_label, "transit_df", transit_df) # some expressions may want to know access mode - locals_dict = path_info.copy() locals_dict['los'] = self.network_los locals_dict.update(model_constants) assignment_spec = assign.read_assignment_spec( file_name=config.config_file_path(tap_tap_settings['SPEC'])) DEDUPE = True if DEDUPE: # assign uid for reduping max_atap = transit_df.atap.max() + 1 transit_df[ 'uid'] = transit_df.btap * max_atap + transit_df.atap # dedupe chooser_attribute_columns = list(chooser_attributes.columns) unique_transit_df = \ transit_df.loc[~transit_df.uid.duplicated(), ['btap', 'atap', 'uid'] + chooser_attribute_columns] unique_transit_df.set_index('uid', inplace=True) chunk.log_df(trace_label, "unique_transit_df", unique_transit_df) logger.debug( f"#TVPB CACHE deduped transit_df from {len(transit_df)} to {len(unique_transit_df)}" ) # assign_variables results, _, _ = assign.assign_variables( assignment_spec, unique_transit_df, locals_dict) assert len(results.columns == 1) unique_transit_df['transit'] = results # redupe results back into transit_df with memo("#TVPB compute_tap_tap_time redupe transit_df"): transit_df['transit'] = reindex(unique_transit_df.transit, transit_df.uid) del transit_df['uid'] del unique_transit_df chunk.log_df(trace_label, "transit_df", transit_df) chunk.log_df(trace_label, "unique_transit_df", None) else: results, _, _ = assign.assign_variables( assignment_spec, transit_df, locals_dict) assert len(results.columns == 1) transit_df['transit'] = results # filter out unavailable btap_atap pairs logger.debug( f"{(transit_df['transit'] <= 0).sum()} unavailable tap_tap pairs out of {len(transit_df)}" ) transit_df = transit_df[transit_df.transit > 0] transit_df.drop(columns=chooser_attributes.columns, inplace=True) chunk.log_df(trace_label, "transit_df", None) if trace: self.trace_df(transit_df, trace_label, 'transit_df') return transit_df
def compute_maz_tap_utilities(self, recipe, maz_od_df, chooser_attributes, leg, mode, trace_label, trace): trace_label = tracing.extend_trace_label(trace_label, f'maz_tap_utils.{leg}') with chunk.chunk_log(trace_label): maz_tap_settings = \ self.network_los.setting(f'TVPB_SETTINGS.{recipe}.maz_tap_settings.{mode}') chooser_columns = maz_tap_settings['CHOOSER_COLUMNS'] attribute_columns = list( chooser_attributes.columns ) if chooser_attributes is not None else [] model_constants = self.network_los.setting( f'TVPB_SETTINGS.{recipe}.CONSTANTS') if leg == 'access': maz_col = 'omaz' tap_col = 'btap' else: maz_col = 'dmaz' tap_col = 'atap' # maz_to_tap access/egress utilities # deduped utilities_df - one row per chooser for each boarding tap (btap) accessible from omaz utilities_df = self.network_los.maz_to_tap_dfs[mode] utilities_df = utilities_df[chooser_columns]. \ reset_index(drop=False). \ rename(columns={'MAZ': maz_col, 'TAP': tap_col}) utilities_df = pd.merge(maz_od_df[['idx', maz_col]].drop_duplicates(), utilities_df, on=maz_col, how='inner') # add any supplemental chooser attributes (e.g. demographic_segment, tod) for c in attribute_columns: utilities_df[c] = reindex(chooser_attributes[c], utilities_df['idx']) chunk.log_df(trace_label, "utilities_df", utilities_df) if self.units_for_recipe(recipe) == 'utility': utilities_df[leg] = compute_utilities( self.network_los, maz_tap_settings, utilities_df, model_constants=model_constants, trace_label=trace_label, trace=trace, trace_column_names=['idx', maz_col, tap_col] if trace else None) chunk.log_df(trace_label, "utilities_df", utilities_df) # annotated else: assignment_spec = \ assign.read_assignment_spec(file_name=config.config_file_path(maz_tap_settings['SPEC'])) results, _, _ = assign.assign_variables( assignment_spec, utilities_df, model_constants) assert len(results.columns == 1) utilities_df[leg] = results chunk.log_df(trace_label, "utilities_df", utilities_df) if trace: self.trace_df(utilities_df, trace_label, 'utilities_df') # drop utility computation columns ('tod', 'demographic_segment' and maz_to_tap_df time/distance columns) utilities_df.drop(columns=attribute_columns + chooser_columns, inplace=True) return utilities_df
def physical_activity_processor(trips_with_demographics, persons_merged, physical_activity_trip_spec, physical_activity_person_spec, physical_activity_settings, coc_column_names, settings, chunk_size, trace_hh_id): """ Compute physical benefits Physical activity benefits generally accrue if the net physical activity for an individual exceeds a certain threshold. We calculate individual physical activity based on trips, so we need to compute trip activity and then sum up to the person level to calculate benefits. We chunk trips by household id to ensure that all of a persons trips are in the same chunk. """ trips_df = trips_with_demographics.to_frame() persons_df = persons_merged.to_frame() trace_label = 'physical_activity' logger.info( "Running physical_activity_processor with %d trips for %d persons " % (len(trips_df), len(persons_df))) locals_dict = config.get_model_constants(physical_activity_settings) locals_dict.update(config.setting('globals')) trip_trace_rows = trace_hh_id and trips_df.household_id == trace_hh_id rows_per_chunk, effective_chunk_size = \ physical_activity_rpc(chunk_size, trips_df, persons_df, physical_activity_trip_spec, trace_label) logger.info("physical_activity_processor chunk_size %s rows_per_chunk %s" % (chunk_size, rows_per_chunk)) coc_summary = None result_list = [] # iterate over trips df chunked by hh_id for i, num_chunks, trips_chunk, trace_rows_chunk \ in bca.chunked_df_by_chunk_id(trips_df, trip_trace_rows, rows_per_chunk): logger.info("%s chunk %s of %s" % (trace_label, i, num_chunks)) trip_activity, trip_trace_results, trip_trace_assigned_locals = \ assign.assign_variables(physical_activity_trip_spec, trips_chunk, locals_dict=locals_dict, df_alias='trips', trace_rows=trace_rows_chunk) # since tracing is at household level, trace_results will occur in only one chunk # we can just write them out when we see them without need to accumulate across chunks if trip_trace_results is not None: tracing.write_csv(trip_trace_results, file_name="physical_activity_trips", index_label='trip_id', column_labels=['label', 'trip']) if trip_trace_assigned_locals: tracing.write_csv(trip_trace_assigned_locals, file_name="physical_activity_trips_locals") # sum trip activity for each unique person trip_activity = trip_activity.groupby(trips_chunk.person_id).sum() # merge in persons columns for this chunk persons_chunk = pd.merge(trip_activity, persons_df, left_index=True, right_index=True) # trace rows array for this chunk person_trace_rows = trace_hh_id and persons_chunk[ 'household_id'] == trace_hh_id person_activity, person_trace_results, person_trace_assigned_locals = \ assign.assign_variables(physical_activity_person_spec, persons_chunk, locals_dict=locals_dict, df_alias='persons', trace_rows=person_trace_rows) # since tracing is at household level, trace_results will occur in only one chunk # we can just write them out when we see them without need to accumulate across chunks if person_trace_results is not None: tracing.write_csv(person_trace_results, file_name="physical_activity_persons", index_label='persons_merged_table_index', column_labels=['label', 'person']) if person_trace_assigned_locals: tracing.write_csv(person_trace_assigned_locals, file_name="physical_activity_persons_locals") # concat in the coc columns and summarize the chunk by coc person_activity = pd.concat( [persons_chunk[coc_column_names], person_activity], axis=1) coc_summary = person_activity.groupby(coc_column_names).sum() result_list.append(coc_summary) chunk_trace_label = 'trace_label chunk_%s' % i chunk.log_open(chunk_trace_label, chunk_size, effective_chunk_size) chunk.log_df(chunk_trace_label, 'trips_chunk', trips_chunk) chunk.log_df(chunk_trace_label, 'persons_chunk', persons_chunk) chunk.log_close(chunk_trace_label) if len(result_list) > 1: # (if there was only one chunk, then concat is redundant) coc_summary = pd.concat(result_list) # squash the accumulated chunk summaries by reapplying group and sum coc_summary.reset_index(inplace=True) coc_summary = coc_summary.groupby(coc_column_names).sum() result_prefix = 'PA_' add_result_columns("coc_results", coc_summary, result_prefix) add_summary_results(coc_summary, prefix=result_prefix, spec=physical_activity_person_spec)
def best_transit_path(set_random_seed, network_los, best_transit_path_spec): model_settings = config.read_model_settings('best_transit_path.yaml') logger.info("best_transit_path VECTOR_TEST_SIZE %s", VECTOR_TEST_SIZE) omaz = network_los.maz_df.sample(VECTOR_TEST_SIZE, replace=True).index dmaz = network_los.maz_df.sample(VECTOR_TEST_SIZE, replace=True).index tod = np.random.choice(['AM', 'PM'], VECTOR_TEST_SIZE) od_df = pd.DataFrame({'omaz': omaz, 'dmaz': dmaz, 'tod': tod}) trace_od = (od_df.omaz[0], od_df.dmaz[0]) logger.info("trace_od omaz %s dmaz %s" % trace_od) # build exploded atap_btap_df # FIXME - pathological knowledge about mode - should be parameterized # filter out rows with no drive time omaz-btap or no walk time from dmaz-atap atap_btap_df = network_los.get_tappairs_mazpairs( od_df.omaz, od_df.dmaz, ofilter='drive_time', dfilter='walk_alightingActual') # add in tod column atap_btap_df = atap_btap_df.merge(right=od_df[['tod']], left_on='idx', right_index=True, how='left') logger.info("len od_df %s", len(od_df.index)) logger.info("len atap_btap_df %s", len(atap_btap_df.index)) logger.info("avg explosion %s", (len(atap_btap_df.index) / (1.0 * len(od_df.index)))) if trace_od: trace_orig, trace_dest = trace_od trace_oabd_rows = (atap_btap_df.omaz == trace_orig) & (atap_btap_df.dmaz == trace_dest) else: trace_oabd_rows = None constants = config.get_model_constants(model_settings) locals_d = {'np': np, 'network_los': network_los} if constants is not None: locals_d.update(constants) results, trace_results, trace_assigned_locals \ = assign.assign_variables(best_transit_path_spec, atap_btap_df, locals_d, trace_rows=trace_oabd_rows) # copy results for column in results.columns: atap_btap_df[column] = results[column] # drop rows if no utility n = len(atap_btap_df.index) atap_btap_df = atap_btap_df.dropna(subset=['utility']) logger.info("Dropped %s of %s rows with null utility", n - len(atap_btap_df.index), n) # choose max utility atap_btap_df = atap_btap_df.sort_values( by='utility').groupby('idx').tail(1) if trace_od: if not trace_oabd_rows.any(): logger.warning("trace_od not found origin = %s, dest = %s", trace_orig, trace_dest) else: tracing.trace_df(atap_btap_df, label='best_transit_path', slicer='NONE', transpose=False) tracing.trace_df(trace_results, label='trace_best_transit_path', slicer='NONE', transpose=False) if trace_assigned_locals: tracing.write_csv(trace_assigned_locals, file_name="trace_best_transit_path_locals")
def aggregate_od_processor(zone_districts, zones, data_dir, trace_od): trace_label = 'aggregate_od' logger.info("Running %s" % (trace_label, )) model_settings = config.read_model_settings('aggregate_od.yaml') spec_file_name = model_settings.get('spec_file_name', 'aggregate_od.csv') aggregate_od_spec = bca.read_assignment_spec(spec_file_name) zones = zones.to_frame() zone_districts = zone_districts.to_frame() zone_count = zone_districts.shape[0] assert zones.index.equals(zone_districts.index) # create OD dataframe in order compatible with ODSkims od_df = pd.DataFrame( data={ 'orig': np.repeat(np.asanyarray(zones.index), zone_count), 'dest': np.tile(np.asanyarray(zones.index), zone_count), }) # locals whose values will be accessible to the execution context # when the expressions in spec are applied to choosers locals_dict = config.get_model_constants(model_settings) locals_dict.update(config.setting('globals')) locals_dict['logger'] = logger logger.debug('%s mem before create_skim_locals_dict, %s' % ( trace_label, memory_info(), )) # - add ODSkims to locals (note: we use local_skims list later to close omx files) cache_skims = model_settings.get('cache_skims', False) local_skims = create_skim_locals_dict(model_settings, data_dir, zones, cache_skims) locals_dict.update(local_skims) # - create_zone_matrices dicts locals_dict.update(create_zone_matrices(model_settings, zones)) if trace_od: trace_orig, trace_dest = trace_od trace_od_rows = (od_df.orig == trace_orig) & (od_df.dest == trace_dest) else: trace_od_rows = None logger.debug("%s assigning variables" % (trace_label, )) results, trace_results, trace_assigned_locals = \ assign.assign_variables(aggregate_od_spec, od_df, locals_dict=locals_dict, df_alias='od', trace_rows=trace_od_rows) logger.debug('%s mem after assign_variables, %s' % ( trace_label, memory_info(), )) for local_name, od_skims in local_skims.items(): logger.debug("closing %s" % local_name) od_skims.log_skim_usage() od_skims.close() # summarize aggregate_od_benefits by orig and dest districts logger.debug("%s district summary" % (trace_label, )) results['orig'] = np.repeat(np.asanyarray(zone_districts.district), zone_count) results['dest'] = np.tile(np.asanyarray(zone_districts.district), zone_count) district_summary = results.groupby(['orig', 'dest']).sum() pipeline.replace_table('aggregate_od_district_summary', district_summary) # attribute aggregate_results benefits to origin zone logger.debug("%s zone summary" % (trace_label, )) results['orig'] = od_df['orig'] del results['dest'] zone_summary = results.groupby(['orig']).sum() pipeline.replace_table('aggregate_od_zone_summary', zone_summary) add_aggregate_results(zone_summary, aggregate_od_spec, source=trace_label) if trace_results is not None: tracing.write_csv(trace_results, file_name=trace_label, index_label='index', column_labels=['label', 'od']) if trace_assigned_locals: tracing.write_csv(trace_assigned_locals, file_name="%s_locals" % trace_label, index_label='variable', columns='value')
def compute_accessibility(settings, accessibility_spec, accessibility_settings, skim_dict, omx_file, land_use, trace_od): """ Compute accessibility for each zone in land use file using expressions from accessibility_spec The actual results depend on the expressions in accessibility_spec, but this is initially intended to permit implementation of the mtc accessibility calculation as implemented by Accessibility.job Compute measures of accessibility used by the automobile ownership model. The accessibility measure first multiplies an employment variable by a mode-specific decay function. The product reflects the difficulty of accessing the activities the farther (in terms of round-trip travel time) the jobs are from the location in question. The products to each destination zone are next summed over each origin zone, and the logarithm of the product mutes large differences. The decay function on the walk accessibility measure is steeper than automobile or transit. The minimum accessibility is zero. """ logger.info("Running compute_accessibility") constants = config.get_model_constants(accessibility_settings) land_use_columns = accessibility_settings.get('land_use_columns', []) land_use_df = land_use.to_frame() zone_count = len(land_use_df.index) # create OD dataframe od_df = pd.DataFrame( data={ 'orig': np.repeat(np.asanyarray(land_use_df.index), zone_count), 'dest': np.tile(np.asanyarray(land_use_df.index), zone_count) }) if trace_od: trace_orig, trace_dest = trace_od trace_od_rows = (od_df.orig == trace_orig) & (od_df.dest == trace_dest) else: trace_od_rows = None # merge land_use_columns into od_df land_use_df = land_use_df[land_use_columns] od_df = pd.merge(od_df, land_use_df, left_on='dest', right_index=True).sort_index() locals_d = { 'log': np.log, 'exp': np.exp, 'skim_od': AccessibilitySkims(skim_dict, omx_file, zone_count), 'skim_do': AccessibilitySkims(skim_dict, omx_file, zone_count, transpose=True) } if constants is not None: locals_d.update(constants) results, trace_results, trace_assigned_locals \ = assign.assign_variables(accessibility_spec, od_df, locals_d, trace_rows=trace_od_rows) accessibility_df = pd.DataFrame(index=land_use.index) for column in results.columns: data = np.asanyarray(results[column]) data.shape = (zone_count, zone_count) accessibility_df[column] = np.log(np.sum(data, axis=1) + 1) inject.add_column("accessibility", column, accessibility_df[column]) if trace_od: if not trace_od_rows.any(): logger.warn("trace_od not found origin = %s, dest = %s" % (trace_orig, trace_dest)) else: # add OD columns to trace results df = pd.concat([od_df[trace_od_rows], trace_results], axis=1) # dump the trace results table (with _temp variables) to aid debugging # note that this is not the same as the orca-injected accessibility table # FIXME - should we name this differently and also dump the updated accessibility table? tracing.trace_df(df, label='accessibility', index_label='skim_offset', slicer='NONE', warn_if_empty=True) if trace_assigned_locals: tracing.write_csv(trace_assigned_locals, file_name="accessibility_locals")
def compute_columns(df, model_settings, configs_dir, trace_label=None): """ Evaluate expressions_spec in context of df, with optional additional pipeline tables in locals Parameters ---------- df : pandas DataFrame or if None, expect name of pipeline table to be specified by DF in model_settings model_settings : dict or str dict with keys: DF - df_alias and (additionally, if df is None) name of pipeline table to load as df SPEC - name of expressions file (csv suffix optional) if different from model_settings TABLES - list of pipeline tables to load and make available as (read only) locals str: name of yaml file in confirs_dir to load dict from configs_dir trace_label Returns ------- results: pandas.DataFrame one column for each expression (except temps with ALL_CAP target names) same index as df """ if isinstance(model_settings, str): model_settings_name = model_settings model_settings = config.read_model_settings(configs_dir, '%s.yaml' % model_settings) assert model_settings, "Found no model settings for %s" % model_settings_name else: model_settings_name = 'dict' assert 'DF' in model_settings, \ "Expected to find 'DF' in %s" % model_settings_name df_name = model_settings.get('DF') helper_table_names = model_settings.get('TABLES', []) expressions_spec_name = model_settings.get('SPEC', model_settings_name) assert expressions_spec_name is not None, \ "Expected to find 'SPEC' in %s" % model_settings_name if trace_label is None: trace_label = expressions_spec_name if not expressions_spec_name.endswith(".csv"): expressions_spec_name = '%s.csv' % expressions_spec_name expressions_spec = assign.read_assignment_spec(os.path.join(configs_dir, expressions_spec_name)) tables = {t: inject.get_table(t).to_frame() for t in helper_table_names} # if df was passed in, df might be a slice, or any other table, but DF is it's local alias assert df_name not in tables, "Did not expect to find df '%s' in TABLES" % df_name tables[df_name] = df locals_dict = local_utilities() locals_dict.update(tables) results, trace_results, trace_assigned_locals \ = assign.assign_variables(expressions_spec, df, locals_dict, trace_rows=tracing.trace_targets(df)) if trace_results is not None: tracing.trace_df(trace_results, label=trace_label, slicer='NONE', warn_if_empty=True) if trace_assigned_locals: tracing.write_csv(trace_assigned_locals, file_name="%s_locals" % trace_label) return results
def compute_columns(df, model_settings, locals_dict={}, trace_label=None): """ Evaluate expressions_spec in context of df, with optional additional pipeline tables in locals Parameters ---------- df : pandas DataFrame or if None, expect name of pipeline table to be specified by DF in model_settings model_settings : dict or str dict with keys: DF - df_alias and (additionally, if df is None) name of pipeline table to load as df SPEC - name of expressions file (csv suffix optional) if different from model_settings TABLES - list of pipeline tables to load and make available as (read only) locals str: name of yaml file in configs_dir to load dict from locals_dict : dict dict of locals (e.g. utility functions) to add to the execution environment trace_label Returns ------- results: pandas.DataFrame one column for each expression (except temps with ALL_CAP target names) same index as df """ if isinstance(model_settings, str): model_settings_name = model_settings model_settings = config.read_model_settings('%s.yaml' % model_settings) assert model_settings, "Found no model settings for %s" % model_settings_name else: model_settings_name = 'dict' assert isinstance(model_settings, dict) assert 'DF' in model_settings, \ "Expected to find 'DF' in %s" % model_settings_name df_name = model_settings.get('DF') helper_table_names = model_settings.get('TABLES', []) expressions_spec_name = model_settings.get('SPEC', None) assert expressions_spec_name is not None, \ "Expected to find 'SPEC' in %s" % model_settings_name trace_label = tracing.extend_trace_label(trace_label or '', expressions_spec_name) if not expressions_spec_name.endswith(".csv"): expressions_spec_name = '%s.csv' % expressions_spec_name logger.debug( f"{trace_label} compute_columns using expression spec file {expressions_spec_name}" ) expressions_spec = assign.read_assignment_spec( config.config_file_path(expressions_spec_name)) assert expressions_spec.shape[0] > 0, \ "Expected to find some assignment expressions in %s" % expressions_spec_name tables = {t: inject.get_table(t).to_frame() for t in helper_table_names} # if df was passed in, df might be a slice, or any other table, but DF is it's local alias assert df_name not in tables, "Did not expect to find df '%s' in TABLES" % df_name tables[df_name] = df # be nice and also give it to them as df? tables['df'] = df _locals_dict = assign.local_utilities() _locals_dict.update(locals_dict) _locals_dict.update(tables) # FIXME a number of asim model preprocessors want skim_dict - should they request it in model_settings.TABLES? _locals_dict.update({ # 'los': inject.get_injectable('network_los', None), 'skim_dict': inject.get_injectable('skim_dict', None), }) results, trace_results, trace_assigned_locals \ = assign.assign_variables(expressions_spec, df, _locals_dict, trace_rows=tracing.trace_targets(df)) if trace_results is not None: tracing.trace_df(trace_results, label=trace_label, slicer='NONE') if trace_assigned_locals: tracing.write_csv(trace_assigned_locals, file_name="%s_locals" % trace_label) return results