def compute_accessibility(land_use, accessibility, network_los, chunk_size, trace_od): """ Compute accessibility for each zone in land use file using expressions from accessibility_spec The actual results depend on the expressions in accessibility_spec, but this is initially intended to permit implementation of the mtc accessibility calculation as implemented by Accessibility.job Compute measures of accessibility used by the automobile ownership model. The accessibility measure first multiplies an employment variable by a mode-specific decay function. The product reflects the difficulty of accessing the activities the farther (in terms of round-trip travel time) the jobs are from the location in question. The products to each destination zone are next summed over each origin zone, and the logarithm of the product mutes large differences. The decay function on the walk accessibility measure is steeper than automobile or transit. The minimum accessibility is zero. """ trace_label = 'compute_accessibility' model_settings = config.read_model_settings('accessibility.yaml') assignment_spec = assign.read_assignment_spec( config.config_file_path('accessibility.csv')) accessibility_df = accessibility.to_frame() if len(accessibility_df.columns) > 0: logger.warning( f"accessibility table is not empty. Columns:{list(accessibility_df.columns)}" ) raise RuntimeError(f"accessibility table is not empty.") constants = config.get_model_constants(model_settings) # only include the land_use columns needed by spec, as specified by land_use_columns model_setting land_use_columns = model_settings.get('land_use_columns', []) land_use_df = land_use.to_frame() land_use_df = land_use_df[land_use_columns] logger.info( f"Running {trace_label} with {len(accessibility_df.index)} orig zones {len(land_use_df)} dest zones" ) accessibilities_list = [] for i, chooser_chunk, chunk_trace_label in \ chunk.adaptive_chunked_choosers(accessibility_df, chunk_size, trace_label): accessibilities = \ compute_accessibilities_for_zones(chooser_chunk, land_use_df, assignment_spec, constants, network_los, trace_od, trace_label) accessibilities_list.append(accessibilities) accessibility_df = pd.concat(accessibilities_list) logger.info( f"{trace_label} computed accessibilities {accessibility_df.shape}") # - write table to pipeline pipeline.replace_table("accessibility", accessibility_df)
def compute_tap_tap_time(self, recipe, access_df, egress_df, chooser_attributes, trace_label, trace): trace_label = tracing.extend_trace_label(trace_label, 'compute_tap_tap_time') model_constants = self.network_los.setting( f'TVPB_SETTINGS.{recipe}.CONSTANTS') tap_tap_settings = self.network_los.setting( f'TVPB_SETTINGS.{recipe}.tap_tap_settings') with memo("#TVPB CACHE compute_tap_tap_utilities all_transit_paths"): transit_df = self.all_transit_paths(access_df, egress_df, chooser_attributes, trace_label, trace) # note: transit_df index is arbitrary chunk.log_df(trace_label, "transit_df", transit_df) locals_d = {'los': self.network_los} locals_d.update(model_constants) assignment_spec = assign.read_assignment_spec( file_name=config.config_file_path(tap_tap_settings['SPEC'])) results, _, _ = assign.assign_variables(assignment_spec, transit_df, locals_d) assert len(results.columns == 1) transit_df['transit'] = results # filter out unavailable btap_atap pairs logger.debug( f"{(transit_df['transit'] <= 0).sum()} unavailable tap_tap pairs out of {len(transit_df)}" ) transit_df = transit_df[transit_df.transit > 0] transit_df.drop(columns=chooser_attributes.columns, inplace=True) chunk.log_df(trace_label, "transit_df", None) if trace: self.trace_df(transit_df, trace_label, 'transit_df') return transit_df
def initialize_tvpb_calc_row_size(choosers, network_los, trace_label): """ rows_per_chunk calculator for trip_purpose """ sizer = chunk.RowSizeEstimator(trace_label) model_settings = \ network_los.setting(f'TVPB_SETTINGS.tour_mode_choice.tap_tap_settings') attributes_as_columns = \ network_los.setting('TVPB_SETTINGS.tour_mode_choice.tap_tap_settings.attributes_as_columns', []) # expression_values for each spec row sizer.add_elements(len(choosers.columns), 'choosers') # expression_values for each spec row sizer.add_elements(len(attributes_as_columns), 'attributes_as_columns') preprocessor_settings = model_settings.get('PREPROCESSOR') if preprocessor_settings: preprocessor_spec_name = preprocessor_settings.get('SPEC', None) if not preprocessor_spec_name.endswith(".csv"): preprocessor_spec_name = f'{preprocessor_spec_name}.csv' expressions_spec = assign.read_assignment_spec( config.config_file_path(preprocessor_spec_name)) sizer.add_elements(expressions_spec.shape[0], 'preprocessor') # expression_values for each spec row spec = simulate.read_model_spec(file_name=model_settings['SPEC']) sizer.add_elements(spec.shape[0], 'expression_values') # expression_values for each spec row sizer.add_elements(spec.shape[1], 'utilities') row_size = sizer.get_hwm() return row_size
def compute_columns(df, model_settings, configs_dir, trace_label=None): """ Evaluate expressions_spec in context of df, with optional additional pipeline tables in locals Parameters ---------- df : pandas DataFrame or if None, expect name of pipeline table to be specified by DF in model_settings model_settings : dict or str dict with keys: DF - df_alias and (additionally, if df is None) name of pipeline table to load as df SPEC - name of expressions file (csv suffix optional) if different from model_settings TABLES - list of pipeline tables to load and make available as (read only) locals str: name of yaml file in confirs_dir to load dict from configs_dir trace_label Returns ------- results: pandas.DataFrame one column for each expression (except temps with ALL_CAP target names) same index as df """ if isinstance(model_settings, str): model_settings_name = model_settings model_settings = config.read_model_settings(configs_dir, '%s.yaml' % model_settings) assert model_settings, "Found no model settings for %s" % model_settings_name else: model_settings_name = 'dict' assert 'DF' in model_settings, \ "Expected to find 'DF' in %s" % model_settings_name df_name = model_settings.get('DF') helper_table_names = model_settings.get('TABLES', []) expressions_spec_name = model_settings.get('SPEC', model_settings_name) assert expressions_spec_name is not None, \ "Expected to find 'SPEC' in %s" % model_settings_name if trace_label is None: trace_label = expressions_spec_name if not expressions_spec_name.endswith(".csv"): expressions_spec_name = '%s.csv' % expressions_spec_name expressions_spec = assign.read_assignment_spec(os.path.join(configs_dir, expressions_spec_name)) tables = {t: inject.get_table(t).to_frame() for t in helper_table_names} # if df was passed in, df might be a slice, or any other table, but DF is it's local alias assert df_name not in tables, "Did not expect to find df '%s' in TABLES" % df_name tables[df_name] = df locals_dict = local_utilities() locals_dict.update(tables) results, trace_results, trace_assigned_locals \ = assign.assign_variables(expressions_spec, df, locals_dict, trace_rows=tracing.trace_targets(df)) if trace_results is not None: tracing.trace_df(trace_results, label=trace_label, slicer='NONE', warn_if_empty=True) if trace_assigned_locals: tracing.write_csv(trace_assigned_locals, file_name="%s_locals" % trace_label) return results
def accessibility_spec(configs_dir): f = os.path.join(configs_dir, 'accessibility.csv') return assign.read_assignment_spec(f)
def compute_accessibility(accessibility, skim_dict, land_use, trace_od): """ Compute accessibility for each zone in land use file using expressions from accessibility_spec The actual results depend on the expressions in accessibility_spec, but this is initially intended to permit implementation of the mtc accessibility calculation as implemented by Accessibility.job Compute measures of accessibility used by the automobile ownership model. The accessibility measure first multiplies an employment variable by a mode-specific decay function. The product reflects the difficulty of accessing the activities the farther (in terms of round-trip travel time) the jobs are from the location in question. The products to each destination zone are next summed over each origin zone, and the logarithm of the product mutes large differences. The decay function on the walk accessibility measure is steeper than automobile or transit. The minimum accessibility is zero. """ trace_label = 'compute_accessibility' model_settings = config.read_model_settings('accessibility.yaml') assignment_spec = assign.read_assignment_spec(config.config_file_path('accessibility.csv')) accessibility_df = accessibility.to_frame() logger.info("Running %s with %d dest zones" % (trace_label, len(accessibility_df))) constants = config.get_model_constants(model_settings) land_use_columns = model_settings.get('land_use_columns', []) land_use_df = land_use.to_frame() # #bug # # land_use_df = land_use_df[land_use_df.index % 2 == 1] # accessibility_df = accessibility_df[accessibility_df.index.isin(land_use_df.index)].head(5) # # print "land_use_df", land_use_df.index # print "accessibility_df", accessibility_df.index # #bug orig_zones = accessibility_df.index.values dest_zones = land_use_df.index.values orig_zone_count = len(orig_zones) dest_zone_count = len(dest_zones) logger.info("Running %s with %d dest zones %d orig zones" % (trace_label, dest_zone_count, orig_zone_count)) # create OD dataframe od_df = pd.DataFrame( data={ 'orig': np.repeat(np.asanyarray(accessibility_df.index), dest_zone_count), 'dest': np.tile(np.asanyarray(land_use_df.index), orig_zone_count) } ) if trace_od: trace_orig, trace_dest = trace_od trace_od_rows = (od_df.orig == trace_orig) & (od_df.dest == trace_dest) else: trace_od_rows = None # merge land_use_columns into od_df land_use_df = land_use_df[land_use_columns] od_df = pd.merge(od_df, land_use_df, left_on='dest', right_index=True).sort_index() locals_d = { 'log': np.log, 'exp': np.exp, 'skim_od': AccessibilitySkims(skim_dict, orig_zones, dest_zones), 'skim_do': AccessibilitySkims(skim_dict, orig_zones, dest_zones, transpose=True) } if constants is not None: locals_d.update(constants) results, trace_results, trace_assigned_locals \ = assign.assign_variables(assignment_spec, od_df, locals_d, trace_rows=trace_od_rows) for column in results.columns: data = np.asanyarray(results[column]) data.shape = (orig_zone_count, dest_zone_count) accessibility_df[column] = np.log(np.sum(data, axis=1) + 1) # - write table to pipeline pipeline.replace_table("accessibility", accessibility_df) if trace_od: if not trace_od_rows.any(): logger.warning("trace_od not found origin = %s, dest = %s" % (trace_orig, trace_dest)) else: # add OD columns to trace results df = pd.concat([od_df[trace_od_rows], trace_results], axis=1) # dump the trace results table (with _temp variables) to aid debugging tracing.trace_df(df, label='accessibility', index_label='skim_offset', slicer='NONE', warn_if_empty=True) if trace_assigned_locals: tracing.write_csv(trace_assigned_locals, file_name="accessibility_locals")
def best_transit_path_spec(): return assign.read_assignment_spec(config.config_file_path('best_transit_path.csv'))
def compute_accessibility(accessibility, network_los, land_use, trace_od): """ Compute accessibility for each zone in land use file using expressions from accessibility_spec The actual results depend on the expressions in accessibility_spec, but this is initially intended to permit implementation of the mtc accessibility calculation as implemented by Accessibility.job Compute measures of accessibility used by the automobile ownership model. The accessibility measure first multiplies an employment variable by a mode-specific decay function. The product reflects the difficulty of accessing the activities the farther (in terms of round-trip travel time) the jobs are from the location in question. The products to each destination zone are next summed over each origin zone, and the logarithm of the product mutes large differences. The decay function on the walk accessibility measure is steeper than automobile or transit. The minimum accessibility is zero. """ trace_label = 'compute_accessibility' model_settings = config.read_model_settings('accessibility.yaml') assignment_spec = assign.read_assignment_spec( config.config_file_path('accessibility.csv')) accessibility_df = accessibility.to_frame() logger.info("Running %s with %d dest zones" % (trace_label, len(accessibility_df))) constants = config.get_model_constants(model_settings) land_use_columns = model_settings.get('land_use_columns', []) land_use_df = land_use.to_frame() land_use_df = land_use_df[land_use_columns] # don't assume they are the same: accessibility may be sliced if we are multiprocessing orig_zones = accessibility_df.index.values dest_zones = land_use_df.index.values orig_zone_count = len(orig_zones) dest_zone_count = len(dest_zones) logger.info("Running %s with %d dest zones %d orig zones" % (trace_label, dest_zone_count, orig_zone_count)) # create OD dataframe od_df = pd.DataFrame( data={ 'orig': np.repeat(orig_zones, dest_zone_count), 'dest': np.tile(dest_zones, orig_zone_count) }) if trace_od: trace_orig, trace_dest = trace_od trace_od_rows = (od_df.orig == trace_orig) & (od_df.dest == trace_dest) else: trace_od_rows = None # merge land_use_columns into od_df od_df = pd.merge(od_df, land_use_df, left_on='dest', right_index=True).sort_index() locals_d = { 'log': np.log, 'exp': np.exp, 'network_los': network_los, } skim_dict = network_los.get_default_skim_dict() locals_d['skim_od'] = skim_dict.wrap('orig', 'dest').set_df(od_df) locals_d['skim_do'] = skim_dict.wrap('dest', 'orig').set_df(od_df) if network_los.zone_system == los.THREE_ZONE: locals_d['tvpb'] = TransitVirtualPathBuilder(network_los) if constants is not None: locals_d.update(constants) results, trace_results, trace_assigned_locals \ = assign.assign_variables(assignment_spec, od_df, locals_d, trace_rows=trace_od_rows) for column in results.columns: data = np.asanyarray(results[column]) data.shape = (orig_zone_count, dest_zone_count) # (o,d) accessibility_df[column] = np.log(np.sum(data, axis=1) + 1) logger.info("{trace_label} added {len(results.columns} columns") # - write table to pipeline pipeline.replace_table("accessibility", accessibility_df) if trace_od: if not trace_od_rows.any(): logger.warning( f"trace_od not found origin = {trace_orig}, dest = {trace_dest}" ) else: # add OD columns to trace results df = pd.concat([od_df[trace_od_rows], trace_results], axis=1) # dump the trace results table (with _temp variables) to aid debugging tracing.trace_df(df, label='accessibility', index_label='skim_offset', slicer='NONE', warn_if_empty=True) if trace_assigned_locals: tracing.write_csv(trace_assigned_locals, file_name="accessibility_locals")
def compute_columns(df, model_settings, locals_dict={}, trace_label=None): """ Evaluate expressions_spec in context of df, with optional additional pipeline tables in locals Parameters ---------- df : pandas DataFrame or if None, expect name of pipeline table to be specified by DF in model_settings model_settings : dict or str dict with keys: DF - df_alias and (additionally, if df is None) name of pipeline table to load as df SPEC - name of expressions file (csv suffix optional) if different from model_settings TABLES - list of pipeline tables to load and make available as (read only) locals str: name of yaml file in configs_dir to load dict from locals_dict : dict dict of locals (e.g. utility functions) to add to the execution environment trace_label Returns ------- results: pandas.DataFrame one column for each expression (except temps with ALL_CAP target names) same index as df """ if isinstance(model_settings, str): model_settings_name = model_settings model_settings = config.read_model_settings('%s.yaml' % model_settings) assert model_settings, "Found no model settings for %s" % model_settings_name else: model_settings_name = 'dict' assert isinstance(model_settings, dict) assert 'DF' in model_settings, \ "Expected to find 'DF' in %s" % model_settings_name df_name = model_settings.get('DF') helper_table_names = model_settings.get('TABLES', []) expressions_spec_name = model_settings.get('SPEC', None) assert expressions_spec_name is not None, \ "Expected to find 'SPEC' in %s" % model_settings_name trace_label = tracing.extend_trace_label(trace_label or '', expressions_spec_name) if not expressions_spec_name.endswith(".csv"): expressions_spec_name = '%s.csv' % expressions_spec_name logger.debug( f"{trace_label} compute_columns using expression spec file {expressions_spec_name}" ) expressions_spec = assign.read_assignment_spec( config.config_file_path(expressions_spec_name)) assert expressions_spec.shape[0] > 0, \ "Expected to find some assignment expressions in %s" % expressions_spec_name tables = {t: inject.get_table(t).to_frame() for t in helper_table_names} # if df was passed in, df might be a slice, or any other table, but DF is it's local alias assert df_name not in tables, "Did not expect to find df '%s' in TABLES" % df_name tables[df_name] = df # be nice and also give it to them as df? tables['df'] = df _locals_dict = assign.local_utilities() _locals_dict.update(locals_dict) _locals_dict.update(tables) # FIXME a number of asim model preprocessors want skim_dict - should they request it in model_settings.TABLES? _locals_dict.update({ # 'los': inject.get_injectable('network_los', None), 'skim_dict': inject.get_injectable('skim_dict', None), }) results, trace_results, trace_assigned_locals \ = assign.assign_variables(expressions_spec, df, _locals_dict, trace_rows=tracing.trace_targets(df)) if trace_results is not None: tracing.trace_df(trace_results, label=trace_label, slicer='NONE') if trace_assigned_locals: tracing.write_csv(trace_assigned_locals, file_name="%s_locals" % trace_label) return results
def compute_tap_tap_time(self, recipe, access_df, egress_df, chooser_attributes, path_info, trace_label, trace): trace_label = tracing.extend_trace_label(trace_label, 'compute_tap_tap_time') with chunk.chunk_log(trace_label): model_constants = self.network_los.setting( f'TVPB_SETTINGS.{recipe}.CONSTANTS') tap_tap_settings = self.network_los.setting( f'TVPB_SETTINGS.{recipe}.tap_tap_settings') with memo( "#TVPB CACHE compute_tap_tap_utilities all_transit_paths"): transit_df = self.all_transit_paths(access_df, egress_df, chooser_attributes, trace_label, trace) # note: transit_df index is arbitrary chunk.log_df(trace_label, "transit_df", transit_df) # some expressions may want to know access mode - locals_dict = path_info.copy() locals_dict['los'] = self.network_los locals_dict.update(model_constants) assignment_spec = assign.read_assignment_spec( file_name=config.config_file_path(tap_tap_settings['SPEC'])) DEDUPE = True if DEDUPE: # assign uid for reduping max_atap = transit_df.atap.max() + 1 transit_df[ 'uid'] = transit_df.btap * max_atap + transit_df.atap # dedupe chooser_attribute_columns = list(chooser_attributes.columns) unique_transit_df = \ transit_df.loc[~transit_df.uid.duplicated(), ['btap', 'atap', 'uid'] + chooser_attribute_columns] unique_transit_df.set_index('uid', inplace=True) chunk.log_df(trace_label, "unique_transit_df", unique_transit_df) logger.debug( f"#TVPB CACHE deduped transit_df from {len(transit_df)} to {len(unique_transit_df)}" ) # assign_variables results, _, _ = assign.assign_variables( assignment_spec, unique_transit_df, locals_dict) assert len(results.columns == 1) unique_transit_df['transit'] = results # redupe results back into transit_df with memo("#TVPB compute_tap_tap_time redupe transit_df"): transit_df['transit'] = reindex(unique_transit_df.transit, transit_df.uid) del transit_df['uid'] del unique_transit_df chunk.log_df(trace_label, "transit_df", transit_df) chunk.log_df(trace_label, "unique_transit_df", None) else: results, _, _ = assign.assign_variables( assignment_spec, transit_df, locals_dict) assert len(results.columns == 1) transit_df['transit'] = results # filter out unavailable btap_atap pairs logger.debug( f"{(transit_df['transit'] <= 0).sum()} unavailable tap_tap pairs out of {len(transit_df)}" ) transit_df = transit_df[transit_df.transit > 0] transit_df.drop(columns=chooser_attributes.columns, inplace=True) chunk.log_df(trace_label, "transit_df", None) if trace: self.trace_df(transit_df, trace_label, 'transit_df') return transit_df
def compute_maz_tap_utilities(self, recipe, maz_od_df, chooser_attributes, leg, mode, trace_label, trace): trace_label = tracing.extend_trace_label(trace_label, f'maz_tap_utils.{leg}') with chunk.chunk_log(trace_label): maz_tap_settings = \ self.network_los.setting(f'TVPB_SETTINGS.{recipe}.maz_tap_settings.{mode}') chooser_columns = maz_tap_settings['CHOOSER_COLUMNS'] attribute_columns = list( chooser_attributes.columns ) if chooser_attributes is not None else [] model_constants = self.network_los.setting( f'TVPB_SETTINGS.{recipe}.CONSTANTS') if leg == 'access': maz_col = 'omaz' tap_col = 'btap' else: maz_col = 'dmaz' tap_col = 'atap' # maz_to_tap access/egress utilities # deduped utilities_df - one row per chooser for each boarding tap (btap) accessible from omaz utilities_df = self.network_los.maz_to_tap_dfs[mode] utilities_df = utilities_df[chooser_columns]. \ reset_index(drop=False). \ rename(columns={'MAZ': maz_col, 'TAP': tap_col}) utilities_df = pd.merge(maz_od_df[['idx', maz_col]].drop_duplicates(), utilities_df, on=maz_col, how='inner') # add any supplemental chooser attributes (e.g. demographic_segment, tod) for c in attribute_columns: utilities_df[c] = reindex(chooser_attributes[c], utilities_df['idx']) chunk.log_df(trace_label, "utilities_df", utilities_df) if self.units_for_recipe(recipe) == 'utility': utilities_df[leg] = compute_utilities( self.network_los, maz_tap_settings, utilities_df, model_constants=model_constants, trace_label=trace_label, trace=trace, trace_column_names=['idx', maz_col, tap_col] if trace else None) chunk.log_df(trace_label, "utilities_df", utilities_df) # annotated else: assignment_spec = \ assign.read_assignment_spec(file_name=config.config_file_path(maz_tap_settings['SPEC'])) results, _, _ = assign.assign_variables( assignment_spec, utilities_df, model_constants) assert len(results.columns == 1) utilities_df[leg] = results chunk.log_df(trace_label, "utilities_df", utilities_df) if trace: self.trace_df(utilities_df, trace_label, 'utilities_df') # drop utility computation columns ('tod', 'demographic_segment' and maz_to_tap_df time/distance columns) utilities_df.drop(columns=attribute_columns + chooser_columns, inplace=True) return utilities_df
def best_transit_path_spec(): return assign.read_assignment_spec( config.config_file_path('best_transit_path.csv'))
def best_transit_path_spec(configs_dir): f = os.path.join(configs_dir, 'best_transit_path.csv') return assign.read_assignment_spec(f)