def cdap_model( edb_directory="output/estimation_data_bundle/{name}/", coefficients_file="{name}_coefficients.csv", interaction_coeffs_file="{name}_interaction_coefficients.csv", households_file="../../final_households.csv", persons_file="../../final_persons.csv", spec1_file="{name}_INDIV_AND_HHSIZE1_SPEC.csv", settings_file="{name}_model_settings.yaml", chooser_data_file="{name}_values_combined.csv", return_data=False, ): d = cdap_data( name="cdap", edb_directory=edb_directory, coefficients_file=coefficients_file, interaction_coeffs_file=interaction_coeffs_file, households_file=households_file, persons_file=persons_file, spec1_file=spec1_file, settings_file=settings_file, chooser_data_file=chooser_data_file, ) households = d.households values = d.person_data spec1 = d.spec1 interaction_coef = d.interaction_coef coefficients = d.coefficients cdap_dfs = cdap_dataframes(households, values) m = {} _logger.info(f"building for model 1") m[1] = Model(dataservice=cdap_dfs[1]) cdap_base_utility_by_person(m[1], n_persons=1, spec=spec1) m[1].choice_any = True m[1].availability_any = True # Add cardinality into interaction_coef if not present if 'cardinality' not in interaction_coef: interaction_coef['cardinality'] = interaction_coef[ 'interaction_ptypes'].str.len() for s in [2, 3, 4, 5]: _logger.info(f"building for model {s}") m[s] = Model(dataservice=cdap_dfs[s]) alts = generate_alternatives(s) cdap_base_utility_by_person(m[s], s, spec1, alts, values.columns) cdap_interaction_utility(m[s], s, alts, interaction_coef, coefficients) m[s].choice_any = True m[s].availability_any = True model = ModelGroup(m.values()) explicit_value_parameters(model) apply_coefficients(coefficients, model) if return_data: return model, d return model
def test_301(): d = example(300, 'd') m = Model(dataservice=d) v = [ "timeperiod==2", "timeperiod==3", "timeperiod==4", "timeperiod==5", "timeperiod==6", "timeperiod==7", "timeperiod==8", "timeperiod==9", "carrier==2", "carrier==3", "carrier==4", "carrier==5", "equipment==2", "fare_hy", "fare_ly", "elapsed_time", "nb_cnxs", ] m.utility_ca = sum(PX(i) for i in v) m.choice_ca_var = 'choice' m.load_data() result = m.maximize_loglike() assert result.loglike == approx(-777770.0688722526) assert result.x['carrier==2'] == approx(0.11720047917232307) assert result.logloss == approx(3.306873650593341)
def nonmand_tour_freq_model( edb_directory="output/estimation_data_bundle/{name}/", return_data=False, ): data = interaction_simulate_data( name="non_mandatory_tour_frequency", edb_directory=edb_directory, ) settings = data.settings segment_names = [s["NAME"] for s in settings["SPEC_SEGMENTS"]] data.relabel_coef = link_same_value_coefficients(segment_names, data.coefficients, data.spec) spec = data.spec coefficients = data.coefficients chooser_data = data.chooser_data alt_values = data.alt_values alt_def = data.alt_def m = {} for segment_name in segment_names: segment_model = m[segment_name] = Model() # One of the alternatives is coded as 0, so # we need to explicitly initialize the MNL nesting graph # and set to root_id to a value other than zero. segment_model.initialize_graph(alternative_codes=alt_def.index, root_id=9999) # Utility specifications segment_model.utility_ca = linear_utility_from_spec( spec, x_col="Label", p_col=segment_name, ) apply_coefficients(coefficients[segment_name], segment_model) segment_model.choice_co_code = "override_choice" # Attach Data x_co = (chooser_data[segment_name].set_index("person_id").rename( columns={"TAZ": "HOMETAZ"})) x_ca = cv_to_ca(alt_values[segment_name].set_index( ["person_id", "variable"])) d = DataFrames( co=x_co, ca=x_ca, av=~unavail(segment_model, x_ca), ) m[segment_name].dataservice = d if return_data: return m, data return m
def stop_frequency_model( edb_directory="output/estimation_data_bundle/{name}/", return_data=False, ): data = stop_frequency_data( edb_directory=edb_directory, values_index_col="tour_id", ) models = [] for n in range(len(data.spec)): coefficients = data.coefficients # coef_template = data.coef_template # not used spec = data.spec[n] chooser_data = data.chooser_data[n] settings = data.settings alt_names = data.alt_names[n] alt_codes = data.alt_codes[n] from .general import clean_values chooser_data = clean_values( chooser_data, alt_names_to_codes=data.alt_names_to_codes[n], choice_code="override_choice_code", ) if settings.get('LOGIT_TYPE') == 'NL': tree = construct_nesting_tree(data.alt_names[n], settings["NESTS"]) m = Model(graph=tree) else: m = Model() m.utility_co = dict_of_linear_utility_from_spec( spec, "Label", dict(zip(alt_names, alt_codes)), ) apply_coefficients(coefficients, m) avail = True d = DataFrames( co=chooser_data, av=avail, alt_codes=alt_codes, alt_names=alt_names, ) m.dataservice = d m.choice_co_code = "override_choice_code" models.append(m) from larch.model.model_group import ModelGroup models = ModelGroup(models) if return_data: return ( models, data, ) return models
def test_utility_function_output(): k = Model() k.utility_ca = P.Aaa * X.Aaa + P.Bbb * X.Bbb + P.Ccc k.utility_co[1] = P.Dx1 + P.Dy1 * X.Yyy k.utility_co[2] = P.Dx2 + P.Dy2 * X.Yyy k.quantity_ca = P.Qaa * X.Aaa + P.Qbb * X.Bbb + P.Qcc k.set_values(Aaa=12, Bbb=20, Ccc=2, Dx1=0, Dy1=0.001, Dx2=0.33, Dy2=-0.002) u1 = k.utility_functions(resolve_parameters=False) assert u1.tostring() == '<div><table class="floatinghead" style="margin-top:1px;"><thead>' \ '<tr><th>alt</th><th style="text-align:left;">formula</th></tr></thead>' \ '<tbody><tr><td>1</td><td style="text-align:left;"><div></div> + ' \ '<div class="tooltipped">P.Aaa<span class="tooltiptext">12</span></div> * ' \ '<div class="tooltipped">X.Aaa<span class="tooltiptext">This is Data</span>' \ '</div><br> + <div class="tooltipped">P.Bbb<span class="tooltiptext">20</span>' \ '</div> * <div class="tooltipped">X.Bbb<span class="tooltiptext">This is Data' \ '</span></div><br> + <div class="tooltipped">P.Ccc<span class="tooltiptext">2' \ '</span></div><br> + <div class="tooltipped">P.Dx1<span class="tooltiptext">0' \ '</span></div><br> + <div class="tooltipped">P.Dy1<span class="tooltiptext">0.001' \ '</span></div> * <div class="tooltipped">X.Yyy<span class="tooltiptext">' \ 'This is Data</span></div><br> + log(<br>\xa0\xa0 + <span></span>exp(' \ '<div class="tooltipped">P.Qaa<span class="tooltiptext">exp(0) = 0</span></div>) ' \ '* <div class="tooltipped">X.Aaa<span class="tooltiptext">This is Data</span></div>' \ '<br>\xa0\xa0 + <span></span>exp(<div class="tooltipped">P.Qbb<span class="tooltiptext">' \ 'exp(0) = 0</span></div>) * <div class="tooltipped">X.Bbb<span class="tooltiptext">' \ 'This is Data</span></div><br>\xa0\xa0 + <span></span>exp(<div class="tooltipped">' \ 'P.Qcc<span class="tooltiptext">exp(0) = 0</span></div>)<br>)</td></tr><tr><td>2</td>' \ '<td style="text-align:left;"><div></div> + <div class="tooltipped">P.Aaa' \ '<span class="tooltiptext">12</span></div> * <div class="tooltipped">X.Aaa' \ '<span class="tooltiptext">This is Data</span></div><br> + <div class="tooltipped">' \ 'P.Bbb<span class="tooltiptext">20</span></div> * <div class="tooltipped">X.Bbb' \ '<span class="tooltiptext">This is Data</span></div><br> + <div class="tooltipped">' \ 'P.Ccc<span class="tooltiptext">2</span></div><br> + <div class="tooltipped">P.Dx2' \ '<span class="tooltiptext">0.33</span></div><br> + <div class="tooltipped">P.Dy2' \ '<span class="tooltiptext">-0.002</span></div> * <div class="tooltipped">X.Yyy' \ '<span class="tooltiptext">This is Data</span></div><br> + log(<br>\xa0\xa0 + ' \ '<span></span>exp(<div class="tooltipped">P.Qaa<span class="tooltiptext">exp(0) = 0' \ '</span></div>) * <div class="tooltipped">X.Aaa<span class="tooltiptext">' \ 'This is Data</span></div><br>\xa0\xa0 + <span></span>exp(<div class="tooltipped">' \ 'P.Qbb<span class="tooltiptext">exp(0) = 0</span></div>) * <div class="tooltipped">' \ 'X.Bbb<span class="tooltiptext">This is Data</span></div><br>\xa0\xa0 + <span>' \ '</span>exp(<div class="tooltipped">P.Qcc<span class="tooltiptext">exp(0) = 0</span>' \ '</div>)<br>)</td></tr></tbody></table></div>' u2 = k.utility_functions(resolve_parameters=True) assert u2.tostring() == '<div><table class="floatinghead" style="margin-top:1px;"><thead><tr><th>alt</th>' \ '<th style="text-align:left;">formula</th></tr></thead><tbody><tr><td>1</td>' \ '<td style="text-align:left;"><div></div> + <div class="tooltipped">12' \ '<span class="tooltiptext">P.Aaa</span></div> * <div class="tooltipped">' \ 'X.Aaa<span class="tooltiptext">This is Data</span></div><br> + <div class="tooltipped">' \ '20<span class="tooltiptext">P.Bbb</span></div> * <div class="tooltipped">X.Bbb' \ '<span class="tooltiptext">This is Data</span></div><br> + <div class="tooltipped">' \ '2<span class="tooltiptext">P.Ccc</span></div><br> + <div class="tooltipped">0' \ '<span class="tooltiptext">P.Dx1</span></div><br> + <div class="tooltipped">0.001' \ '<span class="tooltiptext">P.Dy1</span></div> * <div class="tooltipped">X.Yyy' \ '<span class="tooltiptext">This is Data</span></div><br> + log(<br>\xa0\xa0 + <span>' \ '</span>exp(<div class="tooltipped">0<span class="tooltiptext">exp(P.Qaa)</span></div>) ' \ '* <div class="tooltipped">X.Aaa<span class="tooltiptext">This is Data</span></div>' \ '<br>\xa0\xa0 + <span></span>exp(<div class="tooltipped">0<span class="tooltiptext">' \ 'exp(P.Qbb)</span></div>) * <div class="tooltipped">X.Bbb<span class="tooltiptext">' \ 'This is Data</span></div><br>\xa0\xa0 + <span></span>exp(<div class="tooltipped">0' \ '<span class="tooltiptext">exp(P.Qcc)</span></div>)<br>)</td></tr><tr><td>2</td>' \ '<td style="text-align:left;"><div></div> + <div class="tooltipped">12' \ '<span class="tooltiptext">P.Aaa</span></div> * <div class="tooltipped">X.Aaa' \ '<span class="tooltiptext">This is Data</span></div><br> + <div class="tooltipped">' \ '20<span class="tooltiptext">P.Bbb</span></div> * <div class="tooltipped">X.Bbb' \ '<span class="tooltiptext">This is Data</span></div><br> + <div class="tooltipped">2' \ '<span class="tooltiptext">P.Ccc</span></div><br> + <div class="tooltipped">0.33' \ '<span class="tooltiptext">P.Dx2</span></div><br> + <div class="tooltipped">-0.002' \ '<span class="tooltiptext">P.Dy2</span></div> * <div class="tooltipped">X.Yyy' \ '<span class="tooltiptext">This is Data</span></div><br> + log(<br>\xa0\xa0 + <span>' \ '</span>exp(<div class="tooltipped">0<span class="tooltiptext">exp(P.Qaa)</span></div>) ' \ '* <div class="tooltipped">X.Aaa<span class="tooltiptext">This is Data</span></div>' \ '<br>\xa0\xa0 + <span></span>exp(<div class="tooltipped">0<span class="tooltiptext">' \ 'exp(P.Qbb)</span></div>) * <div class="tooltipped">X.Bbb<span class="tooltiptext">' \ 'This is Data</span></div><br>\xa0\xa0 + <span></span>exp(<div class="tooltipped">' \ '0<span class="tooltiptext">exp(P.Qcc)</span></div>)<br>)</td></tr></tbody></table></div>'
def location_choice_model( name="workplace_location", edb_directory="output/estimation_data_bundle/{name}/", coefficients_file="{name}_coefficients.csv", spec_file="{name}_SPEC.csv", size_spec_file="{name}_size_terms.csv", alt_values_file="{name}_alternatives_combined.csv", chooser_file="{name}_choosers_combined.csv", settings_file="{name}_model_settings.yaml", landuse_file="{name}_landuse.csv", return_data=False, ): model_selector = name.replace("_location", "") model_selector = model_selector.replace("_destination", "") model_selector = model_selector.replace("_subtour", "") model_selector = model_selector.replace("_tour", "") if model_selector == 'joint': model_selector = 'non_mandatory' edb_directory = edb_directory.format(name=name) def _read_csv(filename, **kwargs): filename = filename.format(name=name) return pd.read_csv(os.path.join(edb_directory, filename), **kwargs) coefficients = _read_csv( coefficients_file, index_col="coefficient_name", ) spec = _read_csv(spec_file, comment="#") alt_values = _read_csv(alt_values_file) chooser_data = _read_csv(chooser_file) landuse = _read_csv(landuse_file, index_col="zone_id") master_size_spec = _read_csv(size_spec_file) # remove temp rows from spec, ASim uses them to calculate the other values written # to the EDB, but they are not actually part of the utility function themselves. spec = spec.loc[~spec.Expression.isna()] spec = spec.loc[~spec.Expression.str.startswith("_")].copy() settings_file = settings_file.format(name=name) with open(os.path.join(edb_directory, settings_file), "r") as yf: settings = yaml.load( yf, Loader=yaml.SafeLoader, ) include_settings = settings.get("include_settings") if include_settings: include_settings = os.path.join(edb_directory, include_settings) if include_settings and os.path.exists(include_settings): with open(include_settings, "r") as yf: more_settings = yaml.load( yf, Loader=yaml.SafeLoader, ) settings.update(more_settings) CHOOSER_SEGMENT_COLUMN_NAME = settings.get("CHOOSER_SEGMENT_COLUMN_NAME") SEGMENT_IDS = settings.get("SEGMENT_IDS") if SEGMENT_IDS is None: SEGMENTS = settings.get("SEGMENTS") if SEGMENTS is not None: SEGMENT_IDS = {i: i for i in SEGMENTS} SIZE_TERM_SELECTOR = settings.get('SIZE_TERM_SELECTOR', model_selector) # filter size spec for this location choice only size_spec = (master_size_spec.query( f"model_selector == '{SIZE_TERM_SELECTOR}'").drop( columns="model_selector").set_index("segment")) size_spec = size_spec.loc[:, size_spec.max() > 0] size_coef = size_coefficients_from_spec(size_spec) indexes_to_drop = [ "util_size_variable", # pre-computed size (will be re-estimated) "util_size_variable_atwork", # pre-computed size (will be re-estimated) "util_utility_adjustment", # shadow pricing (ignored in estimation) "@df['size_term'].apply(np.log1p)", # pre-computed size (will be re-estimated) ] if 'Label' in spec.columns: indexes_to_drop = [ i for i in indexes_to_drop if i in spec.Label.to_numpy() ] label_column_name = 'Label' elif 'Expression' in spec.columns: indexes_to_drop = [ i for i in indexes_to_drop if i in spec.Expression.to_numpy() ] label_column_name = 'Expression' else: raise ValueError("cannot find Label or Expression in spec file") expression_labels = None if label_column_name == 'Expression': expression_labels = { expr: f"variable_label{n:04d}" for n, expr in enumerate(spec.Expression.to_numpy()) } # Remove shadow pricing and pre-existing size expression for re-estimation spec = (spec.set_index(label_column_name).drop( index=indexes_to_drop).reset_index()) if label_column_name == 'Expression': spec.insert(0, "Label", spec['Expression'].map(expression_labels)) alt_values['variable'] = alt_values['variable'].map(expression_labels) label_column_name = "Label" if name == 'trip_destination': CHOOSER_SEGMENT_COLUMN_NAME = 'primary_purpose' primary_purposes = spec.columns[3:] SEGMENT_IDS = {pp: pp for pp in primary_purposes} chooser_index_name = chooser_data.columns[0] x_co = chooser_data.set_index(chooser_index_name) x_ca = cv_to_ca( alt_values.set_index([chooser_index_name, alt_values.columns[1]])) if CHOOSER_SEGMENT_COLUMN_NAME is not None: # label segments with names SEGMENT_IDS_REVERSE = {v: k for k, v in SEGMENT_IDS.items()} x_co["_segment_label"] = x_co[CHOOSER_SEGMENT_COLUMN_NAME].apply( lambda x: SEGMENT_IDS_REVERSE[x]) else: x_co["_segment_label"] = size_spec.index[0] # compute total size values by segment for segment in size_spec.index: total_size_segment = pd.Series(0, index=landuse.index) x_co["total_size_" + segment] = 0 for land_use_field in size_spec.loc[segment].index: total_size_segment += (landuse[land_use_field] * size_spec.loc[segment, land_use_field]) x_co["total_size_" + segment] = total_size_segment.loc[ x_co["override_choice"]].to_numpy() # for each chooser, collate the appropriate total size value x_co["total_size_segment"] = 0 for segment in size_spec.index: labels = "total_size_" + segment rows = x_co["_segment_label"] == segment x_co.loc[rows, "total_size_segment"] = x_co[labels][rows] # Remove choosers with invalid observed choice (appropriate total size value = 0) valid_observed_zone = x_co["total_size_segment"] > 0 x_co = x_co[valid_observed_zone] x_ca = x_ca[x_ca.index.get_level_values(chooser_index_name).isin( x_co.index)] # Merge land use characteristics into CA data try: x_ca_1 = pd.merge(x_ca, landuse, on="zone_id", how="left") except KeyError: # Missing the zone_id variable? # Use the alternative id's instead, which assumes no sampling of alternatives x_ca_1 = pd.merge(x_ca, landuse, left_on=x_ca.index.get_level_values(1), right_index=True, how="left") x_ca_1.index = x_ca.index # Availability of choice zones if "util_no_attractions" in x_ca_1: av = x_ca_1["util_no_attractions"].apply( lambda x: False if x == 1 else True).astype(np.int8) elif "@df['size_term']==0" in x_ca_1: av = x_ca_1["@df['size_term']==0"].apply( lambda x: False if x == 1 else True).astype(np.int8) else: av = 1 d = DataFrames(co=x_co, ca=x_ca_1, av=av) m = Model(dataservice=d) if len(spec.columns) == 4 and all( spec.columns == ['Label', 'Description', 'Expression', 'coefficient']): m.utility_ca = linear_utility_from_spec( spec, x_col="Label", p_col=spec.columns[-1], ignore_x=("local_dist", ), ) elif len(spec.columns) == 4 \ and all(spec.columns[:3] == ['Label', 'Description', 'Expression']) \ and len(SEGMENT_IDS) == 1 \ and spec.columns[3] == list(SEGMENT_IDS.values())[0]: m.utility_ca = linear_utility_from_spec( spec, x_col="Label", p_col=spec.columns[-1], ignore_x=("local_dist", ), ) else: m.utility_ca = linear_utility_from_spec( spec, x_col=label_column_name, p_col=SEGMENT_IDS, ignore_x=("local_dist", ), segment_id=CHOOSER_SEGMENT_COLUMN_NAME, ) if CHOOSER_SEGMENT_COLUMN_NAME is None: assert len(size_spec) == 1 m.quantity_ca = sum( P(f"{i}_{q}") * X(q) for i in size_spec.index for q in size_spec.columns if size_spec.loc[i, q] != 0) else: m.quantity_ca = sum( P(f"{i}_{q}") * X(q) * X(f"{CHOOSER_SEGMENT_COLUMN_NAME}=={str_repr(SEGMENT_IDS[i])}") for i in size_spec.index for q in size_spec.columns if size_spec.loc[i, q] != 0) apply_coefficients(coefficients, m) apply_coefficients(size_coef, m, minimum=-6, maximum=6) m.choice_co_code = "override_choice" if return_data: return ( m, Dict( edb_directory=Path(edb_directory), alt_values=alt_values, chooser_data=chooser_data, coefficients=coefficients, landuse=landuse, spec=spec, size_spec=size_spec, master_size_spec=master_size_spec, model_selector=model_selector, settings=settings, ), ) return m
def schedule_choice_model( name, edb_directory="output/estimation_data_bundle/{name}/", coefficients_file="{name}_coefficients.csv", spec_file="{name}_SPEC.csv", alt_values_file="{name}_alternatives_combined.csv", chooser_file="{name}_choosers_combined.csv", settings_file="{name}_model_settings.yaml", return_data=False, ): model_selector = name.replace("_location", "") model_selector = model_selector.replace("_destination", "") model_selector = model_selector.replace("_subtour", "") model_selector = model_selector.replace("_tour", "") edb_directory = edb_directory.format(name=name) def _read_csv(filename, optional=False, **kwargs): filename = filename.format(name=name) try: return pd.read_csv(os.path.join(edb_directory, filename), **kwargs) except FileNotFoundError: if optional: return None else: raise settings_file = settings_file.format(name=name) with open(os.path.join(edb_directory, settings_file), "r") as yf: settings = yaml.load( yf, Loader=yaml.SafeLoader, ) try: coefficients = _read_csv( coefficients_file, index_col="coefficient_name", ) except FileNotFoundError: # possibly mis-named file is shown in settings coefficients_file = settings.get('COEFFICIENTS', coefficients_file) coefficients = _read_csv( coefficients_file, index_col="coefficient_name", ) spec = _read_csv(spec_file, comment='#') alt_values = _read_csv(alt_values_file) chooser_data = _read_csv(chooser_file) # remove temp rows from spec, ASim uses them to calculate the other values written # to the EDB, but they are not actually part of the utility function themselves. spec = spec.loc[~spec.Expression.str.startswith("_")].copy() include_settings = settings.get("include_settings") if include_settings: with open(os.path.join(edb_directory, include_settings), "r") as yf: more_settings = yaml.load( yf, Loader=yaml.SafeLoader, ) settings.update(more_settings) CHOOSER_SEGMENT_COLUMN_NAME = settings.get("CHOOSER_SEGMENT_COLUMN_NAME") SEGMENT_IDS = settings.get("SEGMENT_IDS") if SEGMENT_IDS is None: SEGMENTS = settings.get("SEGMENTS") if SEGMENTS is not None: SEGMENT_IDS = {i: i for i in SEGMENTS} if 'Label' in spec.columns: label_column_name = 'Label' elif 'Expression' in spec.columns: label_column_name = 'Expression' else: raise ValueError("cannot find Label or Expression in spec file") m = Model() if len(spec.columns) == 4 and ([c.lower() for c in spec.columns] == [ 'label', 'description', 'expression', 'coefficient' ]): m.utility_ca = linear_utility_from_spec( spec, x_col="Label", p_col=spec.columns[-1], ignore_x=("local_dist", ), ) elif len(spec.columns) == 4 \ and all(spec.columns[:3] == ['Label', 'Description', 'Expression']) \ and len(SEGMENT_IDS) == 1 \ and spec.columns[3] == list(SEGMENT_IDS.values())[0]: m.utility_ca = linear_utility_from_spec( spec, x_col="Label", p_col=spec.columns[-1], ignore_x=("local_dist", ), ) else: m.utility_ca = linear_utility_from_spec( spec, x_col=label_column_name, p_col=SEGMENT_IDS, ignore_x=("local_dist", ), segment_id=CHOOSER_SEGMENT_COLUMN_NAME, ) apply_coefficients(coefficients, m, minimum=-25, maximum=25) chooser_index_name = chooser_data.columns[0] x_co = chooser_data.set_index(chooser_index_name) alt_values.fillna(0, inplace=True) x_ca = cv_to_ca( alt_values.set_index([chooser_index_name, alt_values.columns[1]]), required_labels=spec[label_column_name], ) # if CHOOSER_SEGMENT_COLUMN_NAME is not None: # # label segments with names # SEGMENT_IDS_REVERSE = {v: k for k, v in SEGMENT_IDS.items()} # x_co["_segment_label"] = x_co[CHOOSER_SEGMENT_COLUMN_NAME].apply( # lambda x: SEGMENT_IDS_REVERSE[x] # ) # else: # x_co["_segment_label"] = size_spec.index[0] alt_codes = np.arange(len(x_ca.index.levels[1])) + 1 x_ca.index = x_ca.index.set_levels(alt_codes, 1) x_co["override_choice_plus1"] = x_co["override_choice"] + 1 x_co["model_choice_plus1"] = x_co["model_choice"] + 1 unavail_coefs = coefficients.query( "(constrain == 'T') & (value < -900)").index unavail_data = [i.data for i in m.utility_ca if i.param in unavail_coefs] if len(unavail_data): joint_unavail = "|".join(f"({i}>0)" for i in unavail_data) joint_avail = f"~({joint_unavail})" else: joint_avail = 1 d = DataFrames(co=x_co, ca=x_ca, av=joint_avail) m.dataservice = d m.choice_co_code = "override_choice_plus1" # m.choice_co_code = "model_choice_plus1" if return_data: return ( m, Dict( edb_directory=Path(edb_directory), alt_values=alt_values, chooser_data=chooser_data, coefficients=coefficients, spec=spec, model_selector=model_selector, joint_avail=joint_avail, ), ) return m
def mode_choice_model( name, edb_directory="output/estimation_data_bundle/{name}/", return_data=False, override_filenames=None, ): if override_filenames is None: override_filenames = {} data = simple_simulate_data( name=name, edb_directory=edb_directory, **override_filenames, ) coefficients = data.coefficients coef_template = data.coef_template spec = data.spec chooser_data = data.chooser_data settings = data.settings chooser_data = clean_values( chooser_data, alt_names_to_codes=data.alt_names_to_codes, choice_code="override_choice_code", ) tree = construct_nesting_tree(data.alt_names, settings["NESTS"]) purposes = list(coef_template.columns) if "atwork" in name: purposes = ['atwork'] elif 'atwork' in purposes: purposes.remove('atwork') # Setup purpose specific models m = {purpose: Model(graph=tree, title=purpose) for purpose in purposes} for alt_code, alt_name in tree.elemental_names().items(): # Read in base utility function for this alt_name u = linear_utility_from_spec( spec, x_col="Label", p_col=alt_name, ignore_x=("#", ), ) for purpose in purposes: # Modify utility function based on template for purpose u_purp = sum((P(coef_template[purpose].get(i.param, i.param)) * i.data * i.scale) for i in u) m[purpose].utility_co[alt_code] = u_purp for model in m.values(): explicit_value_parameters(model) apply_coefficients(coefficients, m) avail = construct_availability(m[purposes[0]], chooser_data, data.alt_codes_to_names) d = DataFrames( co=chooser_data, av=avail, alt_codes=data.alt_codes, alt_names=data.alt_names, ) if 'atwork' not in name: for purpose, model in m.items(): model.dataservice = d.selector_co(f"tour_type=='{purpose}'") model.choice_co_code = "override_choice_code" else: for purpose, model in m.items(): model.dataservice = d model.choice_co_code = "override_choice_code" from larch.model.model_group import ModelGroup mg = ModelGroup(m.values()) if return_data: return ( mg, Dict( edb_directory=Path(edb_directory), chooser_data=chooser_data, avail=avail, coefficients=coefficients, coef_template=coef_template, spec=spec, settings=settings, ), ) return mg
def simple_simulate_model( name, edb_directory="output/estimation_data_bundle/{name}/", return_data=False, choices=None, construct_avail=False, values_index_col="household_id", ): data = simple_simulate_data( name=name, edb_directory=edb_directory, values_index_col=values_index_col, ) coefficients = data.coefficients # coef_template = data.coef_template # not used spec = data.spec chooser_data = data.chooser_data settings = data.settings alt_names = data.alt_names alt_codes = data.alt_codes from .general import clean_values chooser_data = clean_values( chooser_data, alt_names_to_codes=choices or data.alt_names_to_codes, choice_code="override_choice_code", ) if settings.get('LOGIT_TYPE') == 'NL': tree = construct_nesting_tree(data.alt_names, settings["NESTS"]) m = Model(graph=tree) else: m = Model(alts=data.alt_codes_to_names) m.utility_co = dict_of_linear_utility_from_spec( spec, "Label", dict(zip(alt_names, alt_codes)), ) apply_coefficients(coefficients, m) if construct_avail: avail = construct_availability(m, chooser_data, data.alt_codes_to_names) else: avail = True d = DataFrames(co=chooser_data, av=avail, alt_codes=alt_codes, alt_names=alt_names, ) m.dataservice = d m.choice_co_code = "override_choice_code" if return_data: return ( m, Dict( edb_directory=data.edb_directory, chooser_data=chooser_data, coefficients=coefficients, spec=spec, alt_names=alt_names, alt_codes=alt_codes, settings=settings, ), ) return m
def auto_ownership_model( name="auto_ownership", edb_directory="output/estimation_data_bundle/{name}/", return_data=False, ): data = simple_simulate_data( name=name, edb_directory=edb_directory, values_index_col="household_id", ) coefficients = data.coefficients # coef_template = data.coef_template # not used spec = data.spec chooser_data = data.chooser_data settings = data.settings altnames = list(spec.columns[3:]) altcodes = range(len(altnames)) chooser_data = remove_apostrophes(chooser_data) chooser_data.fillna(0, inplace=True) # Remove choosers with invalid observed choice chooser_data = chooser_data[chooser_data["override_choice"] >= 0] m = Model() # One of the alternatives is coded as 0, so # we need to explicitly initialize the MNL nesting graph # and set to root_id to a value other than zero. m.initialize_graph(alternative_codes=altcodes, root_id=99) m.utility_co = dict_of_linear_utility_from_spec( spec, "Label", dict(zip(altnames, altcodes)), ) apply_coefficients(coefficients, m) d = DataFrames( co=chooser_data, av=True, alt_codes=altcodes, alt_names=altnames, ) m.dataservice = d m.choice_co_code = "override_choice" if return_data: return ( m, Dict( edb_directory=data.edb_directory, chooser_data=chooser_data, coefficients=coefficients, spec=spec, altnames=altnames, altcodes=altcodes, ), ) return m