def compute_utilities_for_attribute_tuple(network_los, scalar_attributes, data, chunk_size, trace_label): # scalar_attributes is a dict of attribute name/value pairs for this combination # (e.g. {'demographic_segment': 0, 'tod': 'AM', 'access_mode': 'walk'}) logger.info(f"{trace_label} scalar_attributes: {scalar_attributes}") uid_calculator = network_los.tvpb.uid_calculator attributes_as_columns = \ network_los.setting('TVPB_SETTINGS.tour_mode_choice.tap_tap_settings.attributes_as_columns', []) model_settings = \ network_los.setting(f'TVPB_SETTINGS.tour_mode_choice.tap_tap_settings') model_constants = \ network_los.setting(f'TVPB_SETTINGS.tour_mode_choice.CONSTANTS').copy() model_constants.update(scalar_attributes) data = data.reshape(uid_calculator.fully_populated_shape) # get od skim_offset dataframe with uid index corresponding to scalar_attributes choosers_df = uid_calculator.get_od_dataframe(scalar_attributes) # choosers_df is pretty big and was custom made for compute_utilities but we don't need to chunk_log it # since it is created outside of adaptive_chunked_choosers and so will show up in baseline assert not chunk.chunk_logging() # otherwise we should chunk_log this chunk_tag = 'initialize_tvpb' # all attribute_combinations can use same cached data for row_size calc for i, chooser_chunk, chunk_trace_label \ in chunk.adaptive_chunked_choosers(choosers_df, chunk_size, trace_label, chunk_tag=chunk_tag): # we should count choosers_df as chunk overhead since its pretty big and was custom made for compute_utilities assert chooser_chunk._is_view # otherwise copying it is wasteful chooser_chunk = chooser_chunk.copy() chunk.log_df(trace_label, 'attribute_chooser_chunk', chooser_chunk) # add any attribute columns specified as column attributes in settings (the rest will be scalars in locals_dict) for attribute_name in attributes_as_columns: chooser_chunk[attribute_name] = scalar_attributes[attribute_name] chunk.log_df(trace_label, 'attribute_chooser_chunk', chooser_chunk) utilities_df = \ pathbuilder.compute_utilities(network_los, model_settings=model_settings, choosers=chooser_chunk, model_constants=model_constants, trace_label=trace_label) chunk.log_df(trace_label, 'utilities_df', utilities_df) assert len(utilities_df) == len(chooser_chunk) assert len(utilities_df.columns) == data.shape[1] assert not any_uninitialized(utilities_df.values) data[chooser_chunk.index.values, :] = utilities_df.values del chooser_chunk chunk.log_df(trace_label, 'attribute_chooser_chunk', None) logger.debug(f"{trace_label} updated utilities")
def compute_accessibility(land_use, accessibility, network_los, chunk_size, trace_od): """ Compute accessibility for each zone in land use file using expressions from accessibility_spec The actual results depend on the expressions in accessibility_spec, but this is initially intended to permit implementation of the mtc accessibility calculation as implemented by Accessibility.job Compute measures of accessibility used by the automobile ownership model. The accessibility measure first multiplies an employment variable by a mode-specific decay function. The product reflects the difficulty of accessing the activities the farther (in terms of round-trip travel time) the jobs are from the location in question. The products to each destination zone are next summed over each origin zone, and the logarithm of the product mutes large differences. The decay function on the walk accessibility measure is steeper than automobile or transit. The minimum accessibility is zero. """ trace_label = 'compute_accessibility' model_settings = config.read_model_settings('accessibility.yaml') assignment_spec = assign.read_assignment_spec( config.config_file_path('accessibility.csv')) accessibility_df = accessibility.to_frame() if len(accessibility_df.columns) > 0: logger.warning( f"accessibility table is not empty. Columns:{list(accessibility_df.columns)}" ) raise RuntimeError(f"accessibility table is not empty.") constants = config.get_model_constants(model_settings) # only include the land_use columns needed by spec, as specified by land_use_columns model_setting land_use_columns = model_settings.get('land_use_columns', []) land_use_df = land_use.to_frame() land_use_df = land_use_df[land_use_columns] logger.info( f"Running {trace_label} with {len(accessibility_df.index)} orig zones {len(land_use_df)} dest zones" ) accessibilities_list = [] for i, chooser_chunk, chunk_trace_label in \ chunk.adaptive_chunked_choosers(accessibility_df, chunk_size, trace_label): accessibilities = \ compute_accessibilities_for_zones(chooser_chunk, land_use_df, assignment_spec, constants, network_los, trace_od, trace_label) accessibilities_list.append(accessibilities) accessibility_df = pd.concat(accessibilities_list) logger.info( f"{trace_label} computed accessibilities {accessibility_df.shape}") # - write table to pipeline pipeline.replace_table("accessibility", accessibility_df)
def compute_utilities_for_atttribute_tuple(network_los, scalar_attributes, data, chunk_size, trace_label): # scalar_attributes is a dict of attribute name/value pairs for this combination # (e.g. {'demographic_segment': 0, 'tod': 'AM', 'access_mode': 'walk'}) logger.info(f"{trace_label} scalar_attributes: {scalar_attributes}") uid_calculator = network_los.tvpb.uid_calculator attributes_as_columns = \ network_los.setting('TVPB_SETTINGS.tour_mode_choice.tap_tap_settings.attributes_as_columns', []) model_settings = \ network_los.setting(f'TVPB_SETTINGS.tour_mode_choice.tap_tap_settings') model_constants = \ network_los.setting(f'TVPB_SETTINGS.tour_mode_choice.CONSTANTS').copy() model_constants.update(scalar_attributes) data = data.reshape(uid_calculator.fully_populated_shape) # get od skim_offset dataframe with uid index corresponding to scalar_attributes choosers_df = uid_calculator.get_od_dataframe(scalar_attributes) row_size = chunk_size and initialize_tvpb_calc_row_size( choosers_df, network_los, trace_label) for i, chooser_chunk, chunk_trace_label \ in chunk.adaptive_chunked_choosers(choosers_df, chunk_size, row_size, trace_label): # we should count choosers_df as chunk overhead since its pretty big and was custom made for compute_utilities # (call log_df from inside yield loop so it is visible to adaptive_chunked_choosers chunk_log) chunk.log_df(trace_label, 'choosers_df', choosers_df) # add any attribute columns specified as column attributes in settings (the rest will be scalars in locals_dict) for attribute_name in attributes_as_columns: chooser_chunk[attribute_name] = scalar_attributes[attribute_name] chunk.log_df(trace_label, 'chooser_chunk', chooser_chunk) utilities_df = \ pathbuilder.compute_utilities(network_los, model_settings=model_settings, choosers=chooser_chunk, model_constants=model_constants, trace_label=trace_label) chunk.log_df(trace_label, 'utilities_df', utilities_df) assert len(utilities_df) == len(chooser_chunk) assert len(utilities_df.columns) == data.shape[1] assert not any_uninitialized(utilities_df.values) data[chooser_chunk.index.values, :] = utilities_df.values logger.debug(f"{trace_label} updated utilities")
def run_tour_scheduling_probabilistic(tours_df, scheduling_probs, probs_join_cols, depart_alt_base, chunk_size, trace_label, trace_hh_id): """Make probabilistic tour scheduling choices in chunks Parameters ---------- tours_df : pandas.DataFrame table of tours scheduling_probs : pandas.DataFrame Probability lookup table for tour depature and return times probs_join_cols : str or list of strs Columns to use for merging probability lookup table with tours table depart_alt_base : int int to add to probs column index to get time period it represents. e.g. depart_alt_base = 5 means first column (column 0) represents 5 am chunk_size : int size of chooser chunks, set in main settings.yaml trace_label : str label to append to tracing logs and table names trace_hh_id : int households to trace Returns ------- pandas.Series series of chosen alternative indices for each chooser """ result_list = [] for i, chooser_chunk, chunk_trace_label in chunk.adaptive_chunked_choosers( tours_df, chunk_size, trace_label, trace_label): choices = ps.make_scheduling_choices( chooser_chunk, 'departure', scheduling_probs, probs_join_cols, depart_alt_base, first_trip_in_leg=False, report_failed_trips=True, trace_label=chunk_trace_label, trace_hh_id=trace_hh_id, trace_choice_col_name='depart_return', clip_earliest_latest=False) result_list.append(choices) choices = pd.concat(result_list) return choices
def run_trip_purpose(trips_df, estimator, chunk_size, trace_hh_id, trace_label): """ trip purpose - main functionality separated from model step so it can be called iteratively For each intermediate stop on a tour (i.e. trip other than the last trip outbound or inbound) each trip is assigned a purpose based on an observed frequency distribution The distribution should always be segmented by tour purpose and tour direction. By default it is also segmented by person type. The join columns can be overwritten using the "probs_join_cols" parameter in the model settings. The model will attempt to segment by trip depart time as well if necessary and depart time ranges are specified in the probability lookup table. Returns ------- purpose: pandas.Series of purpose (str) indexed by trip_id """ # uniform across trip_purpose chunk_tag = 'trip_purpose' model_settings_file_name = 'trip_purpose.yaml' model_settings = config.read_model_settings(model_settings_file_name) probs_join_cols = model_settings.get('probs_join_cols', PROBS_JOIN_COLUMNS) spec_file_name = model_settings.get('PROBS_SPEC', 'trip_purpose_probs.csv') probs_spec = pd.read_csv(config.config_file_path(spec_file_name), comment='#') # FIXME for now, not really doing estimation for probabilistic model - just overwriting choices # besides, it isn't clear that named coefficients would be helpful if we had some form of estimation # coefficients_df = simulate.read_model_coefficients(model_settings) # probs_spec = map_coefficients(probs_spec, coefficients_df) if estimator: estimator.write_spec(model_settings, tag='PROBS_SPEC') estimator.write_model_settings(model_settings, model_settings_file_name) # estimator.write_coefficients(coefficients_df, model_settings) result_list = [] # - last trip of outbound tour gets primary_purpose last_trip = (trips_df.trip_num == trips_df.trip_count) purpose = trips_df.primary_purpose[last_trip & trips_df.outbound] result_list.append(purpose) logger.info("assign purpose to %s last outbound trips", purpose.shape[0]) # - last trip of inbound tour gets home (or work for atwork subtours) purpose = trips_df.primary_purpose[last_trip & ~trips_df.outbound] purpose = pd.Series(np.where(purpose == 'atwork', 'work', 'home'), index=purpose.index) result_list.append(purpose) logger.info("assign purpose to %s last inbound trips", purpose.shape[0]) # - intermediate stops (non-last trips) purpose assigned by probability table trips_df = trips_df[~last_trip] logger.info("assign purpose to %s intermediate trips", trips_df.shape[0]) preprocessor_settings = model_settings.get('preprocessor', None) if preprocessor_settings: locals_dict = config.get_model_constants(model_settings) expressions.assign_columns(df=trips_df, model_settings=preprocessor_settings, locals_dict=locals_dict, trace_label=trace_label) use_depart_time = model_settings.get('use_depart_time', True) for i, trips_chunk, chunk_trace_label in \ chunk.adaptive_chunked_choosers(trips_df, chunk_size, chunk_tag, trace_label): choices = choose_intermediate_trip_purpose( trips_chunk, probs_spec, estimator, probs_join_cols=probs_join_cols, use_depart_time=use_depart_time, trace_hh_id=trace_hh_id, trace_label=chunk_trace_label) result_list.append(choices) chunk.log_df(trace_label, f'result_list', result_list) if len(result_list) > 1: choices = pd.concat(result_list) return choices
def run_trip_scheduling_choice(spec, tours, skims, locals_dict, chunk_size, trace_hh_id, trace_label): NUM_TOUR_LEGS = 3 trace_label = tracing.extend_trace_label(trace_label, 'interaction_sample_simulate') # FIXME: The duration, start, and end should be ints well before we get here... tours[TOUR_DURATION_COLUMN] = tours[TOUR_DURATION_COLUMN].astype(np.int8) # Setup boolean columns to make it easier to identify # intermediate stops later in the model. tours[HAS_OB_STOPS] = tours[NUM_OB_STOPS] >= 1 tours[HAS_IB_STOPS] = tours[NUM_IB_STOPS] >= 1 # Calculate a matrix with the appropriate alternative sizes # based on the total tour duration. This is used to calculate # chunk sizes. max_duration = tours[TOUR_DURATION_COLUMN].max() alt_sizes = generate_alternative_sizes(max_duration, NUM_TOUR_LEGS) # Assert the number of tour leg schedule alternatives for each tour tours[NUM_ALTERNATIVES] = 1 tours.loc[tours[HAS_OB_STOPS] != tours[HAS_IB_STOPS], NUM_ALTERNATIVES] = tours[TOUR_DURATION_COLUMN] + 1 tours.loc[tours[HAS_OB_STOPS] & tours[HAS_IB_STOPS], NUM_ALTERNATIVES] = \ tours.apply(lambda x: alt_sizes[1, x.duration], axis=1) # If no intermediate stops on the tour, then then main leg duration # equals the tour duration and the intermediate durations are zero tours.loc[~tours[HAS_OB_STOPS] & ~tours[HAS_IB_STOPS], MAIN_LEG_DURATION] = tours[TOUR_DURATION_COLUMN] tours.loc[~tours[HAS_OB_STOPS] & ~tours[HAS_IB_STOPS], [IB_DURATION, OB_DURATION]] = 0 # We only need to determine schedules for tours with intermediate stops indirect_tours = tours.loc[tours[HAS_OB_STOPS] | tours[HAS_IB_STOPS]] if len(indirect_tours) > 0: # Iterate through the chunks result_list = [] for i, choosers, chunk_trace_label in \ chunk.adaptive_chunked_choosers(indirect_tours, chunk_size, trace_label): # Sort the choosers and get the schedule alternatives choosers = choosers.sort_index() schedules = generate_schedule_alternatives(choosers).sort_index() # Assuming we did the max_alt_size calculation correctly, # we should get the same sizes here. assert choosers[NUM_ALTERNATIVES].sum() == schedules.shape[0] # Run the simulation choices = _interaction_sample_simulate( choosers=choosers, alternatives=schedules, spec=spec, choice_column=SCHEDULE_ID, allow_zero_probs=True, zero_prob_choice_val=-999, log_alt_losers=False, want_logsums=False, skims=skims, locals_d=locals_dict, trace_label=chunk_trace_label, trace_choice_name='trip_schedule_stage_1', estimator=None) assert len(choices.index) == len(choosers.index) choices = schedules[schedules[SCHEDULE_ID].isin(choices)] result_list.append(choices) chunk.log_df(trace_label, f'result_list', result_list) # FIXME: this will require 2X RAM # if necessary, could append to hdf5 store on disk: # http://pandas.pydata.org/pandas-docs/stable/io.html#id2 if len(result_list) > 1: choices = pd.concat(result_list) assert len(choices.index) == len(indirect_tours.index) # The choices here are only the indirect tours, so the durations # need to be updated on the main tour dataframe. tours.update(choices[[MAIN_LEG_DURATION, OB_DURATION, IB_DURATION]]) # Cleanup data types and drop temporary columns tours[[MAIN_LEG_DURATION, OB_DURATION, IB_DURATION]] = \ tours[[MAIN_LEG_DURATION, OB_DURATION, IB_DURATION]].astype(np.int8) tours = tours.drop(columns=TEMP_COLS) return tours
def schedule_tours(tours, persons_merged, alts, spec, logsum_tour_purpose, model_settings, timetable, timetable_window_id_col, previous_tour, tour_owner_id_col, estimator, chunk_size, tour_trace_label): """ chunking wrapper for _schedule_tours While interaction_sample_simulate provides chunking support, the merged tours, persons dataframe and the tdd_interaction_dataset are very big, so we want to create them inside the chunking loop to minimize memory footprint. So we implement the chunking loop here, and pass a chunk_size of 0 to interaction_sample_simulate to disable its chunking support. """ if not tours.index.is_monotonic_increasing: logger.info( "schedule_tours %s tours not monotonic_increasing - sorting df") tours = tours.sort_index() logger.info("%s schedule_tours running %d tour choices" % (tour_trace_label, len(tours))) # no more than one tour per timetable_window per call if timetable_window_id_col is None: assert not tours.index.duplicated().any() else: assert not tours[timetable_window_id_col].duplicated().any() if 'LOGSUM_SETTINGS' in model_settings: # we need skims to calculate tvpb skim overhead in 3_ZONE systems for use by calc_rows_per_chunk skims = skims_for_logsums(logsum_tour_purpose, model_settings, tour_trace_label) else: skims = None row_size = chunk_size and \ tour_scheduling_calc_row_size(tours, persons_merged, alts, skims, spec, model_settings, tour_trace_label) result_list = [] for i, chooser_chunk, chunk_trace_label \ in chunk.adaptive_chunked_choosers(tours, chunk_size, row_size, tour_trace_label): choices = _schedule_tours(chooser_chunk, persons_merged, alts, spec, logsum_tour_purpose, model_settings, skims, timetable, timetable_window_id_col, previous_tour, tour_owner_id_col, estimator, tour_trace_label=chunk_trace_label) result_list.append(choices) mem.force_garbage_collect() # FIXME: this will require 2X RAM # if necessary, could append to hdf5 store on disk: # http://pandas.pydata.org/pandas-docs/stable/io.html#id2 if len(result_list) > 1: choices = pd.concat(result_list) assert len(choices.index == len(tours.index)) return choices
def run_trip_purpose(trips_df, chunk_size, trace_hh_id, trace_label): """ trip purpose - main functionality separated from model step so it can be called iteratively For each intermediate stop on a tour (i.e. trip other than the last trip outbound or inbound) Each trip is assigned a purpose based on an observed frequency distribution The distribution is segmented by tour purpose, tour direction, person type, and, optionally, trip depart time . Returns ------- purpose: pandas.Series of purpose (str) indexed by trip_id """ model_settings = config.read_model_settings('trip_purpose.yaml') probs_spec = trip_purpose_probs() result_list = [] # - last trip of outbound tour gets primary_purpose last_trip = (trips_df.trip_num == trips_df.trip_count) purpose = trips_df.primary_purpose[last_trip & trips_df.outbound] result_list.append(purpose) logger.info("assign purpose to %s last outbound trips", purpose.shape[0]) # - last trip of inbound tour gets home (or work for atwork subtours) purpose = trips_df.primary_purpose[last_trip & ~trips_df.outbound] purpose = pd.Series(np.where(purpose == 'atwork', 'Work', 'Home'), index=purpose.index) result_list.append(purpose) logger.info("assign purpose to %s last inbound trips", purpose.shape[0]) # - intermediate stops (non-last trips) purpose assigned by probability table trips_df = trips_df[~last_trip] logger.info("assign purpose to %s intermediate trips", trips_df.shape[0]) preprocessor_settings = model_settings.get('preprocessor', None) if preprocessor_settings: locals_dict = config.get_model_constants(model_settings) expressions.assign_columns(df=trips_df, model_settings=preprocessor_settings, locals_dict=locals_dict, trace_label=trace_label) row_size = chunk_size and trip_purpose_calc_row_size( trips_df, probs_spec, trace_label) for i, trips_chunk, chunk_trace_label in \ chunk.adaptive_chunked_choosers(trips_df, chunk_size, row_size, trace_label): choices = choose_intermediate_trip_purpose( trips_chunk, probs_spec, trace_hh_id, trace_label=chunk_trace_label) result_list.append(choices) if len(result_list) > 1: choices = pd.concat(result_list) return choices
def run_trip_purpose( trips_df, estimator, chunk_size, trace_hh_id, trace_label): """ trip purpose - main functionality separated from model step so it can be called iteratively For each intermediate stop on a tour (i.e. trip other than the last trip outbound or inbound) Each trip is assigned a purpose based on an observed frequency distribution The distribution is segmented by tour purpose, tour direction, person type, and, optionally, trip depart time . Returns ------- purpose: pandas.Series of purpose (str) indexed by trip_id """ model_settings_file_name = 'trip_purpose.yaml' model_settings = config.read_model_settings(model_settings_file_name) spec_file_name = model_settings.get('PROBS_SPEC', 'trip_purpose_probs.csv') probs_spec = pd.read_csv(config.config_file_path(spec_file_name), comment='#') # FIXME for now, not really doing estimation for probabilistic model - just overwriting choices # besides, it isn't clear that named coefficients would be helpful if we had some form of estimation # coefficients_df = simulate.read_model_coefficients(model_settings) # probs_spec = map_coefficients(probs_spec, coefficients_df) if estimator: estimator.write_spec(model_settings, tag='PROBS_SPEC') estimator.write_model_settings(model_settings, model_settings_file_name) # estimator.write_coefficients(coefficients_df, model_settings) result_list = [] # - last trip of outbound tour gets primary_purpose last_trip = (trips_df.trip_num == trips_df.trip_count) purpose = trips_df.primary_purpose[last_trip & trips_df.outbound] result_list.append(purpose) logger.info("assign purpose to %s last outbound trips", purpose.shape[0]) # - last trip of inbound tour gets home (or work for atwork subtours) purpose = trips_df.primary_purpose[last_trip & ~trips_df.outbound] # FIXME should be lower case for consistency? purpose = pd.Series(np.where(purpose == 'atwork', 'Work', 'Home'), index=purpose.index) result_list.append(purpose) logger.info("assign purpose to %s last inbound trips", purpose.shape[0]) # - intermediate stops (non-last trips) purpose assigned by probability table trips_df = trips_df[~last_trip] logger.info("assign purpose to %s intermediate trips", trips_df.shape[0]) preprocessor_settings = model_settings.get('preprocessor', None) if preprocessor_settings: locals_dict = config.get_model_constants(model_settings) expressions.assign_columns( df=trips_df, model_settings=preprocessor_settings, locals_dict=locals_dict, trace_label=trace_label) row_size = chunk_size and trip_purpose_calc_row_size(trips_df, probs_spec, trace_label) for i, trips_chunk, chunk_trace_label in \ chunk.adaptive_chunked_choosers(trips_df, chunk_size, row_size, trace_label): choices = choose_intermediate_trip_purpose( trips_chunk, probs_spec, estimator, trace_hh_id, trace_label=chunk_trace_label) result_list.append(choices) if len(result_list) > 1: choices = pd.concat(result_list) return choices