def stop_frequency_model( edb_directory="output/estimation_data_bundle/{name}/", return_data=False, ): data = stop_frequency_data( edb_directory=edb_directory, values_index_col="tour_id", ) models = [] for n in range(len(data.spec)): coefficients = data.coefficients # coef_template = data.coef_template # not used spec = data.spec[n] chooser_data = data.chooser_data[n] settings = data.settings alt_names = data.alt_names[n] alt_codes = data.alt_codes[n] from .general import clean_values chooser_data = clean_values( chooser_data, alt_names_to_codes=data.alt_names_to_codes[n], choice_code="override_choice_code", ) if settings.get('LOGIT_TYPE') == 'NL': tree = construct_nesting_tree(data.alt_names[n], settings["NESTS"]) m = Model(graph=tree) else: m = Model() m.utility_co = dict_of_linear_utility_from_spec( spec, "Label", dict(zip(alt_names, alt_codes)), ) apply_coefficients(coefficients, m) avail = True d = DataFrames( co=chooser_data, av=avail, alt_codes=alt_codes, alt_names=alt_names, ) m.dataservice = d m.choice_co_code = "override_choice_code" models.append(m) from larch.model.model_group import ModelGroup models = ModelGroup(models) if return_data: return ( models, data, ) return models
def simple_simulate_model( name, edb_directory="output/estimation_data_bundle/{name}/", return_data=False, choices=None, construct_avail=False, values_index_col="household_id", ): data = simple_simulate_data( name=name, edb_directory=edb_directory, values_index_col=values_index_col, ) coefficients = data.coefficients # coef_template = data.coef_template # not used spec = data.spec chooser_data = data.chooser_data settings = data.settings alt_names = data.alt_names alt_codes = data.alt_codes from .general import clean_values chooser_data = clean_values( chooser_data, alt_names_to_codes=choices or data.alt_names_to_codes, choice_code="override_choice_code", ) if settings.get('LOGIT_TYPE') == 'NL': tree = construct_nesting_tree(data.alt_names, settings["NESTS"]) m = Model(graph=tree) else: m = Model(alts=data.alt_codes_to_names) m.utility_co = dict_of_linear_utility_from_spec( spec, "Label", dict(zip(alt_names, alt_codes)), ) apply_coefficients(coefficients, m) if construct_avail: avail = construct_availability(m, chooser_data, data.alt_codes_to_names) else: avail = True d = DataFrames(co=chooser_data, av=avail, alt_codes=alt_codes, alt_names=alt_names, ) m.dataservice = d m.choice_co_code = "override_choice_code" if return_data: return ( m, Dict( edb_directory=data.edb_directory, chooser_data=chooser_data, coefficients=coefficients, spec=spec, alt_names=alt_names, alt_codes=alt_codes, settings=settings, ), ) return m
def schedule_choice_model( name, edb_directory="output/estimation_data_bundle/{name}/", coefficients_file="{name}_coefficients.csv", spec_file="{name}_SPEC.csv", alt_values_file="{name}_alternatives_combined.csv", chooser_file="{name}_choosers_combined.csv", settings_file="{name}_model_settings.yaml", return_data=False, ): model_selector = name.replace("_location", "") model_selector = model_selector.replace("_destination", "") model_selector = model_selector.replace("_subtour", "") model_selector = model_selector.replace("_tour", "") edb_directory = edb_directory.format(name=name) def _read_csv(filename, optional=False, **kwargs): filename = filename.format(name=name) try: return pd.read_csv(os.path.join(edb_directory, filename), **kwargs) except FileNotFoundError: if optional: return None else: raise settings_file = settings_file.format(name=name) with open(os.path.join(edb_directory, settings_file), "r") as yf: settings = yaml.load( yf, Loader=yaml.SafeLoader, ) try: coefficients = _read_csv( coefficients_file, index_col="coefficient_name", ) except FileNotFoundError: # possibly mis-named file is shown in settings coefficients_file = settings.get('COEFFICIENTS', coefficients_file) coefficients = _read_csv( coefficients_file, index_col="coefficient_name", ) spec = _read_csv(spec_file, comment='#') alt_values = _read_csv(alt_values_file) chooser_data = _read_csv(chooser_file) # remove temp rows from spec, ASim uses them to calculate the other values written # to the EDB, but they are not actually part of the utility function themselves. spec = spec.loc[~spec.Expression.str.startswith("_")].copy() include_settings = settings.get("include_settings") if include_settings: with open(os.path.join(edb_directory, include_settings), "r") as yf: more_settings = yaml.load( yf, Loader=yaml.SafeLoader, ) settings.update(more_settings) CHOOSER_SEGMENT_COLUMN_NAME = settings.get("CHOOSER_SEGMENT_COLUMN_NAME") SEGMENT_IDS = settings.get("SEGMENT_IDS") if SEGMENT_IDS is None: SEGMENTS = settings.get("SEGMENTS") if SEGMENTS is not None: SEGMENT_IDS = {i: i for i in SEGMENTS} if 'Label' in spec.columns: label_column_name = 'Label' elif 'Expression' in spec.columns: label_column_name = 'Expression' else: raise ValueError("cannot find Label or Expression in spec file") m = Model() if len(spec.columns) == 4 and ([c.lower() for c in spec.columns] == [ 'label', 'description', 'expression', 'coefficient' ]): m.utility_ca = linear_utility_from_spec( spec, x_col="Label", p_col=spec.columns[-1], ignore_x=("local_dist", ), ) elif len(spec.columns) == 4 \ and all(spec.columns[:3] == ['Label', 'Description', 'Expression']) \ and len(SEGMENT_IDS) == 1 \ and spec.columns[3] == list(SEGMENT_IDS.values())[0]: m.utility_ca = linear_utility_from_spec( spec, x_col="Label", p_col=spec.columns[-1], ignore_x=("local_dist", ), ) else: m.utility_ca = linear_utility_from_spec( spec, x_col=label_column_name, p_col=SEGMENT_IDS, ignore_x=("local_dist", ), segment_id=CHOOSER_SEGMENT_COLUMN_NAME, ) apply_coefficients(coefficients, m, minimum=-25, maximum=25) chooser_index_name = chooser_data.columns[0] x_co = chooser_data.set_index(chooser_index_name) alt_values.fillna(0, inplace=True) x_ca = cv_to_ca( alt_values.set_index([chooser_index_name, alt_values.columns[1]]), required_labels=spec[label_column_name], ) # if CHOOSER_SEGMENT_COLUMN_NAME is not None: # # label segments with names # SEGMENT_IDS_REVERSE = {v: k for k, v in SEGMENT_IDS.items()} # x_co["_segment_label"] = x_co[CHOOSER_SEGMENT_COLUMN_NAME].apply( # lambda x: SEGMENT_IDS_REVERSE[x] # ) # else: # x_co["_segment_label"] = size_spec.index[0] alt_codes = np.arange(len(x_ca.index.levels[1])) + 1 x_ca.index = x_ca.index.set_levels(alt_codes, 1) x_co["override_choice_plus1"] = x_co["override_choice"] + 1 x_co["model_choice_plus1"] = x_co["model_choice"] + 1 unavail_coefs = coefficients.query( "(constrain == 'T') & (value < -900)").index unavail_data = [i.data for i in m.utility_ca if i.param in unavail_coefs] if len(unavail_data): joint_unavail = "|".join(f"({i}>0)" for i in unavail_data) joint_avail = f"~({joint_unavail})" else: joint_avail = 1 d = DataFrames(co=x_co, ca=x_ca, av=joint_avail) m.dataservice = d m.choice_co_code = "override_choice_plus1" # m.choice_co_code = "model_choice_plus1" if return_data: return ( m, Dict( edb_directory=Path(edb_directory), alt_values=alt_values, chooser_data=chooser_data, coefficients=coefficients, spec=spec, model_selector=model_selector, joint_avail=joint_avail, ), ) return m
def auto_ownership_model( name="auto_ownership", edb_directory="output/estimation_data_bundle/{name}/", return_data=False, ): data = simple_simulate_data( name=name, edb_directory=edb_directory, values_index_col="household_id", ) coefficients = data.coefficients # coef_template = data.coef_template # not used spec = data.spec chooser_data = data.chooser_data settings = data.settings altnames = list(spec.columns[3:]) altcodes = range(len(altnames)) chooser_data = remove_apostrophes(chooser_data) chooser_data.fillna(0, inplace=True) # Remove choosers with invalid observed choice chooser_data = chooser_data[chooser_data["override_choice"] >= 0] m = Model() # One of the alternatives is coded as 0, so # we need to explicitly initialize the MNL nesting graph # and set to root_id to a value other than zero. m.initialize_graph(alternative_codes=altcodes, root_id=99) m.utility_co = dict_of_linear_utility_from_spec( spec, "Label", dict(zip(altnames, altcodes)), ) apply_coefficients(coefficients, m) d = DataFrames( co=chooser_data, av=True, alt_codes=altcodes, alt_names=altnames, ) m.dataservice = d m.choice_co_code = "override_choice" if return_data: return ( m, Dict( edb_directory=data.edb_directory, chooser_data=chooser_data, coefficients=coefficients, spec=spec, altnames=altnames, altcodes=altcodes, ), ) return m