def test_301(): d = example(300, 'd') m = Model(dataservice=d) v = [ "timeperiod==2", "timeperiod==3", "timeperiod==4", "timeperiod==5", "timeperiod==6", "timeperiod==7", "timeperiod==8", "timeperiod==9", "carrier==2", "carrier==3", "carrier==4", "carrier==5", "equipment==2", "fare_hy", "fare_ly", "elapsed_time", "nb_cnxs", ] m.utility_ca = sum(PX(i) for i in v) m.choice_ca_var = 'choice' m.load_data() result = m.maximize_loglike() assert result.loglike == approx(-777770.0688722526) assert result.x['carrier==2'] == approx(0.11720047917232307) assert result.logloss == approx(3.306873650593341)
def test_utility_function_output(): k = Model() k.utility_ca = P.Aaa * X.Aaa + P.Bbb * X.Bbb + P.Ccc k.utility_co[1] = P.Dx1 + P.Dy1 * X.Yyy k.utility_co[2] = P.Dx2 + P.Dy2 * X.Yyy k.quantity_ca = P.Qaa * X.Aaa + P.Qbb * X.Bbb + P.Qcc k.set_values(Aaa=12, Bbb=20, Ccc=2, Dx1=0, Dy1=0.001, Dx2=0.33, Dy2=-0.002) u1 = k.utility_functions(resolve_parameters=False) assert u1.tostring() == '<div><table class="floatinghead" style="margin-top:1px;"><thead>' \ '<tr><th>alt</th><th style="text-align:left;">formula</th></tr></thead>' \ '<tbody><tr><td>1</td><td style="text-align:left;"><div></div> + ' \ '<div class="tooltipped">P.Aaa<span class="tooltiptext">12</span></div> * ' \ '<div class="tooltipped">X.Aaa<span class="tooltiptext">This is Data</span>' \ '</div><br> + <div class="tooltipped">P.Bbb<span class="tooltiptext">20</span>' \ '</div> * <div class="tooltipped">X.Bbb<span class="tooltiptext">This is Data' \ '</span></div><br> + <div class="tooltipped">P.Ccc<span class="tooltiptext">2' \ '</span></div><br> + <div class="tooltipped">P.Dx1<span class="tooltiptext">0' \ '</span></div><br> + <div class="tooltipped">P.Dy1<span class="tooltiptext">0.001' \ '</span></div> * <div class="tooltipped">X.Yyy<span class="tooltiptext">' \ 'This is Data</span></div><br> + log(<br>\xa0\xa0 + <span></span>exp(' \ '<div class="tooltipped">P.Qaa<span class="tooltiptext">exp(0) = 0</span></div>) ' \ '* <div class="tooltipped">X.Aaa<span class="tooltiptext">This is Data</span></div>' \ '<br>\xa0\xa0 + <span></span>exp(<div class="tooltipped">P.Qbb<span class="tooltiptext">' \ 'exp(0) = 0</span></div>) * <div class="tooltipped">X.Bbb<span class="tooltiptext">' \ 'This is Data</span></div><br>\xa0\xa0 + <span></span>exp(<div class="tooltipped">' \ 'P.Qcc<span class="tooltiptext">exp(0) = 0</span></div>)<br>)</td></tr><tr><td>2</td>' \ '<td style="text-align:left;"><div></div> + <div class="tooltipped">P.Aaa' \ '<span class="tooltiptext">12</span></div> * <div class="tooltipped">X.Aaa' \ '<span class="tooltiptext">This is Data</span></div><br> + <div class="tooltipped">' \ 'P.Bbb<span class="tooltiptext">20</span></div> * <div class="tooltipped">X.Bbb' \ '<span class="tooltiptext">This is Data</span></div><br> + <div class="tooltipped">' \ 'P.Ccc<span class="tooltiptext">2</span></div><br> + <div class="tooltipped">P.Dx2' \ '<span class="tooltiptext">0.33</span></div><br> + <div class="tooltipped">P.Dy2' \ '<span class="tooltiptext">-0.002</span></div> * <div class="tooltipped">X.Yyy' \ '<span class="tooltiptext">This is Data</span></div><br> + log(<br>\xa0\xa0 + ' \ '<span></span>exp(<div class="tooltipped">P.Qaa<span class="tooltiptext">exp(0) = 0' \ '</span></div>) * <div class="tooltipped">X.Aaa<span class="tooltiptext">' \ 'This is Data</span></div><br>\xa0\xa0 + <span></span>exp(<div class="tooltipped">' \ 'P.Qbb<span class="tooltiptext">exp(0) = 0</span></div>) * <div class="tooltipped">' \ 'X.Bbb<span class="tooltiptext">This is Data</span></div><br>\xa0\xa0 + <span>' \ '</span>exp(<div class="tooltipped">P.Qcc<span class="tooltiptext">exp(0) = 0</span>' \ '</div>)<br>)</td></tr></tbody></table></div>' u2 = k.utility_functions(resolve_parameters=True) assert u2.tostring() == '<div><table class="floatinghead" style="margin-top:1px;"><thead><tr><th>alt</th>' \ '<th style="text-align:left;">formula</th></tr></thead><tbody><tr><td>1</td>' \ '<td style="text-align:left;"><div></div> + <div class="tooltipped">12' \ '<span class="tooltiptext">P.Aaa</span></div> * <div class="tooltipped">' \ 'X.Aaa<span class="tooltiptext">This is Data</span></div><br> + <div class="tooltipped">' \ '20<span class="tooltiptext">P.Bbb</span></div> * <div class="tooltipped">X.Bbb' \ '<span class="tooltiptext">This is Data</span></div><br> + <div class="tooltipped">' \ '2<span class="tooltiptext">P.Ccc</span></div><br> + <div class="tooltipped">0' \ '<span class="tooltiptext">P.Dx1</span></div><br> + <div class="tooltipped">0.001' \ '<span class="tooltiptext">P.Dy1</span></div> * <div class="tooltipped">X.Yyy' \ '<span class="tooltiptext">This is Data</span></div><br> + log(<br>\xa0\xa0 + <span>' \ '</span>exp(<div class="tooltipped">0<span class="tooltiptext">exp(P.Qaa)</span></div>) ' \ '* <div class="tooltipped">X.Aaa<span class="tooltiptext">This is Data</span></div>' \ '<br>\xa0\xa0 + <span></span>exp(<div class="tooltipped">0<span class="tooltiptext">' \ 'exp(P.Qbb)</span></div>) * <div class="tooltipped">X.Bbb<span class="tooltiptext">' \ 'This is Data</span></div><br>\xa0\xa0 + <span></span>exp(<div class="tooltipped">0' \ '<span class="tooltiptext">exp(P.Qcc)</span></div>)<br>)</td></tr><tr><td>2</td>' \ '<td style="text-align:left;"><div></div> + <div class="tooltipped">12' \ '<span class="tooltiptext">P.Aaa</span></div> * <div class="tooltipped">X.Aaa' \ '<span class="tooltiptext">This is Data</span></div><br> + <div class="tooltipped">' \ '20<span class="tooltiptext">P.Bbb</span></div> * <div class="tooltipped">X.Bbb' \ '<span class="tooltiptext">This is Data</span></div><br> + <div class="tooltipped">2' \ '<span class="tooltiptext">P.Ccc</span></div><br> + <div class="tooltipped">0.33' \ '<span class="tooltiptext">P.Dx2</span></div><br> + <div class="tooltipped">-0.002' \ '<span class="tooltiptext">P.Dy2</span></div> * <div class="tooltipped">X.Yyy' \ '<span class="tooltiptext">This is Data</span></div><br> + log(<br>\xa0\xa0 + <span>' \ '</span>exp(<div class="tooltipped">0<span class="tooltiptext">exp(P.Qaa)</span></div>) ' \ '* <div class="tooltipped">X.Aaa<span class="tooltiptext">This is Data</span></div>' \ '<br>\xa0\xa0 + <span></span>exp(<div class="tooltipped">0<span class="tooltiptext">' \ 'exp(P.Qbb)</span></div>) * <div class="tooltipped">X.Bbb<span class="tooltiptext">' \ 'This is Data</span></div><br>\xa0\xa0 + <span></span>exp(<div class="tooltipped">' \ '0<span class="tooltiptext">exp(P.Qcc)</span></div>)<br>)</td></tr></tbody></table></div>'
def location_choice_model( name="workplace_location", edb_directory="output/estimation_data_bundle/{name}/", coefficients_file="{name}_coefficients.csv", spec_file="{name}_SPEC.csv", size_spec_file="{name}_size_terms.csv", alt_values_file="{name}_alternatives_combined.csv", chooser_file="{name}_choosers_combined.csv", settings_file="{name}_model_settings.yaml", landuse_file="{name}_landuse.csv", return_data=False, ): model_selector = name.replace("_location", "") model_selector = model_selector.replace("_destination", "") model_selector = model_selector.replace("_subtour", "") model_selector = model_selector.replace("_tour", "") if model_selector == 'joint': model_selector = 'non_mandatory' edb_directory = edb_directory.format(name=name) def _read_csv(filename, **kwargs): filename = filename.format(name=name) return pd.read_csv(os.path.join(edb_directory, filename), **kwargs) coefficients = _read_csv( coefficients_file, index_col="coefficient_name", ) spec = _read_csv(spec_file, comment="#") alt_values = _read_csv(alt_values_file) chooser_data = _read_csv(chooser_file) landuse = _read_csv(landuse_file, index_col="zone_id") master_size_spec = _read_csv(size_spec_file) # remove temp rows from spec, ASim uses them to calculate the other values written # to the EDB, but they are not actually part of the utility function themselves. spec = spec.loc[~spec.Expression.isna()] spec = spec.loc[~spec.Expression.str.startswith("_")].copy() settings_file = settings_file.format(name=name) with open(os.path.join(edb_directory, settings_file), "r") as yf: settings = yaml.load( yf, Loader=yaml.SafeLoader, ) include_settings = settings.get("include_settings") if include_settings: include_settings = os.path.join(edb_directory, include_settings) if include_settings and os.path.exists(include_settings): with open(include_settings, "r") as yf: more_settings = yaml.load( yf, Loader=yaml.SafeLoader, ) settings.update(more_settings) CHOOSER_SEGMENT_COLUMN_NAME = settings.get("CHOOSER_SEGMENT_COLUMN_NAME") SEGMENT_IDS = settings.get("SEGMENT_IDS") if SEGMENT_IDS is None: SEGMENTS = settings.get("SEGMENTS") if SEGMENTS is not None: SEGMENT_IDS = {i: i for i in SEGMENTS} SIZE_TERM_SELECTOR = settings.get('SIZE_TERM_SELECTOR', model_selector) # filter size spec for this location choice only size_spec = (master_size_spec.query( f"model_selector == '{SIZE_TERM_SELECTOR}'").drop( columns="model_selector").set_index("segment")) size_spec = size_spec.loc[:, size_spec.max() > 0] size_coef = size_coefficients_from_spec(size_spec) indexes_to_drop = [ "util_size_variable", # pre-computed size (will be re-estimated) "util_size_variable_atwork", # pre-computed size (will be re-estimated) "util_utility_adjustment", # shadow pricing (ignored in estimation) "@df['size_term'].apply(np.log1p)", # pre-computed size (will be re-estimated) ] if 'Label' in spec.columns: indexes_to_drop = [ i for i in indexes_to_drop if i in spec.Label.to_numpy() ] label_column_name = 'Label' elif 'Expression' in spec.columns: indexes_to_drop = [ i for i in indexes_to_drop if i in spec.Expression.to_numpy() ] label_column_name = 'Expression' else: raise ValueError("cannot find Label or Expression in spec file") expression_labels = None if label_column_name == 'Expression': expression_labels = { expr: f"variable_label{n:04d}" for n, expr in enumerate(spec.Expression.to_numpy()) } # Remove shadow pricing and pre-existing size expression for re-estimation spec = (spec.set_index(label_column_name).drop( index=indexes_to_drop).reset_index()) if label_column_name == 'Expression': spec.insert(0, "Label", spec['Expression'].map(expression_labels)) alt_values['variable'] = alt_values['variable'].map(expression_labels) label_column_name = "Label" if name == 'trip_destination': CHOOSER_SEGMENT_COLUMN_NAME = 'primary_purpose' primary_purposes = spec.columns[3:] SEGMENT_IDS = {pp: pp for pp in primary_purposes} chooser_index_name = chooser_data.columns[0] x_co = chooser_data.set_index(chooser_index_name) x_ca = cv_to_ca( alt_values.set_index([chooser_index_name, alt_values.columns[1]])) if CHOOSER_SEGMENT_COLUMN_NAME is not None: # label segments with names SEGMENT_IDS_REVERSE = {v: k for k, v in SEGMENT_IDS.items()} x_co["_segment_label"] = x_co[CHOOSER_SEGMENT_COLUMN_NAME].apply( lambda x: SEGMENT_IDS_REVERSE[x]) else: x_co["_segment_label"] = size_spec.index[0] # compute total size values by segment for segment in size_spec.index: total_size_segment = pd.Series(0, index=landuse.index) x_co["total_size_" + segment] = 0 for land_use_field in size_spec.loc[segment].index: total_size_segment += (landuse[land_use_field] * size_spec.loc[segment, land_use_field]) x_co["total_size_" + segment] = total_size_segment.loc[ x_co["override_choice"]].to_numpy() # for each chooser, collate the appropriate total size value x_co["total_size_segment"] = 0 for segment in size_spec.index: labels = "total_size_" + segment rows = x_co["_segment_label"] == segment x_co.loc[rows, "total_size_segment"] = x_co[labels][rows] # Remove choosers with invalid observed choice (appropriate total size value = 0) valid_observed_zone = x_co["total_size_segment"] > 0 x_co = x_co[valid_observed_zone] x_ca = x_ca[x_ca.index.get_level_values(chooser_index_name).isin( x_co.index)] # Merge land use characteristics into CA data try: x_ca_1 = pd.merge(x_ca, landuse, on="zone_id", how="left") except KeyError: # Missing the zone_id variable? # Use the alternative id's instead, which assumes no sampling of alternatives x_ca_1 = pd.merge(x_ca, landuse, left_on=x_ca.index.get_level_values(1), right_index=True, how="left") x_ca_1.index = x_ca.index # Availability of choice zones if "util_no_attractions" in x_ca_1: av = x_ca_1["util_no_attractions"].apply( lambda x: False if x == 1 else True).astype(np.int8) elif "@df['size_term']==0" in x_ca_1: av = x_ca_1["@df['size_term']==0"].apply( lambda x: False if x == 1 else True).astype(np.int8) else: av = 1 d = DataFrames(co=x_co, ca=x_ca_1, av=av) m = Model(dataservice=d) if len(spec.columns) == 4 and all( spec.columns == ['Label', 'Description', 'Expression', 'coefficient']): m.utility_ca = linear_utility_from_spec( spec, x_col="Label", p_col=spec.columns[-1], ignore_x=("local_dist", ), ) elif len(spec.columns) == 4 \ and all(spec.columns[:3] == ['Label', 'Description', 'Expression']) \ and len(SEGMENT_IDS) == 1 \ and spec.columns[3] == list(SEGMENT_IDS.values())[0]: m.utility_ca = linear_utility_from_spec( spec, x_col="Label", p_col=spec.columns[-1], ignore_x=("local_dist", ), ) else: m.utility_ca = linear_utility_from_spec( spec, x_col=label_column_name, p_col=SEGMENT_IDS, ignore_x=("local_dist", ), segment_id=CHOOSER_SEGMENT_COLUMN_NAME, ) if CHOOSER_SEGMENT_COLUMN_NAME is None: assert len(size_spec) == 1 m.quantity_ca = sum( P(f"{i}_{q}") * X(q) for i in size_spec.index for q in size_spec.columns if size_spec.loc[i, q] != 0) else: m.quantity_ca = sum( P(f"{i}_{q}") * X(q) * X(f"{CHOOSER_SEGMENT_COLUMN_NAME}=={str_repr(SEGMENT_IDS[i])}") for i in size_spec.index for q in size_spec.columns if size_spec.loc[i, q] != 0) apply_coefficients(coefficients, m) apply_coefficients(size_coef, m, minimum=-6, maximum=6) m.choice_co_code = "override_choice" if return_data: return ( m, Dict( edb_directory=Path(edb_directory), alt_values=alt_values, chooser_data=chooser_data, coefficients=coefficients, landuse=landuse, spec=spec, size_spec=size_spec, master_size_spec=master_size_spec, model_selector=model_selector, settings=settings, ), ) return m
def schedule_choice_model( name, edb_directory="output/estimation_data_bundle/{name}/", coefficients_file="{name}_coefficients.csv", spec_file="{name}_SPEC.csv", alt_values_file="{name}_alternatives_combined.csv", chooser_file="{name}_choosers_combined.csv", settings_file="{name}_model_settings.yaml", return_data=False, ): model_selector = name.replace("_location", "") model_selector = model_selector.replace("_destination", "") model_selector = model_selector.replace("_subtour", "") model_selector = model_selector.replace("_tour", "") edb_directory = edb_directory.format(name=name) def _read_csv(filename, optional=False, **kwargs): filename = filename.format(name=name) try: return pd.read_csv(os.path.join(edb_directory, filename), **kwargs) except FileNotFoundError: if optional: return None else: raise settings_file = settings_file.format(name=name) with open(os.path.join(edb_directory, settings_file), "r") as yf: settings = yaml.load( yf, Loader=yaml.SafeLoader, ) try: coefficients = _read_csv( coefficients_file, index_col="coefficient_name", ) except FileNotFoundError: # possibly mis-named file is shown in settings coefficients_file = settings.get('COEFFICIENTS', coefficients_file) coefficients = _read_csv( coefficients_file, index_col="coefficient_name", ) spec = _read_csv(spec_file, comment='#') alt_values = _read_csv(alt_values_file) chooser_data = _read_csv(chooser_file) # remove temp rows from spec, ASim uses them to calculate the other values written # to the EDB, but they are not actually part of the utility function themselves. spec = spec.loc[~spec.Expression.str.startswith("_")].copy() include_settings = settings.get("include_settings") if include_settings: with open(os.path.join(edb_directory, include_settings), "r") as yf: more_settings = yaml.load( yf, Loader=yaml.SafeLoader, ) settings.update(more_settings) CHOOSER_SEGMENT_COLUMN_NAME = settings.get("CHOOSER_SEGMENT_COLUMN_NAME") SEGMENT_IDS = settings.get("SEGMENT_IDS") if SEGMENT_IDS is None: SEGMENTS = settings.get("SEGMENTS") if SEGMENTS is not None: SEGMENT_IDS = {i: i for i in SEGMENTS} if 'Label' in spec.columns: label_column_name = 'Label' elif 'Expression' in spec.columns: label_column_name = 'Expression' else: raise ValueError("cannot find Label or Expression in spec file") m = Model() if len(spec.columns) == 4 and ([c.lower() for c in spec.columns] == [ 'label', 'description', 'expression', 'coefficient' ]): m.utility_ca = linear_utility_from_spec( spec, x_col="Label", p_col=spec.columns[-1], ignore_x=("local_dist", ), ) elif len(spec.columns) == 4 \ and all(spec.columns[:3] == ['Label', 'Description', 'Expression']) \ and len(SEGMENT_IDS) == 1 \ and spec.columns[3] == list(SEGMENT_IDS.values())[0]: m.utility_ca = linear_utility_from_spec( spec, x_col="Label", p_col=spec.columns[-1], ignore_x=("local_dist", ), ) else: m.utility_ca = linear_utility_from_spec( spec, x_col=label_column_name, p_col=SEGMENT_IDS, ignore_x=("local_dist", ), segment_id=CHOOSER_SEGMENT_COLUMN_NAME, ) apply_coefficients(coefficients, m, minimum=-25, maximum=25) chooser_index_name = chooser_data.columns[0] x_co = chooser_data.set_index(chooser_index_name) alt_values.fillna(0, inplace=True) x_ca = cv_to_ca( alt_values.set_index([chooser_index_name, alt_values.columns[1]]), required_labels=spec[label_column_name], ) # if CHOOSER_SEGMENT_COLUMN_NAME is not None: # # label segments with names # SEGMENT_IDS_REVERSE = {v: k for k, v in SEGMENT_IDS.items()} # x_co["_segment_label"] = x_co[CHOOSER_SEGMENT_COLUMN_NAME].apply( # lambda x: SEGMENT_IDS_REVERSE[x] # ) # else: # x_co["_segment_label"] = size_spec.index[0] alt_codes = np.arange(len(x_ca.index.levels[1])) + 1 x_ca.index = x_ca.index.set_levels(alt_codes, 1) x_co["override_choice_plus1"] = x_co["override_choice"] + 1 x_co["model_choice_plus1"] = x_co["model_choice"] + 1 unavail_coefs = coefficients.query( "(constrain == 'T') & (value < -900)").index unavail_data = [i.data for i in m.utility_ca if i.param in unavail_coefs] if len(unavail_data): joint_unavail = "|".join(f"({i}>0)" for i in unavail_data) joint_avail = f"~({joint_unavail})" else: joint_avail = 1 d = DataFrames(co=x_co, ca=x_ca, av=joint_avail) m.dataservice = d m.choice_co_code = "override_choice_plus1" # m.choice_co_code = "model_choice_plus1" if return_data: return ( m, Dict( edb_directory=Path(edb_directory), alt_values=alt_values, chooser_data=chooser_data, coefficients=coefficients, spec=spec, model_selector=model_selector, joint_avail=joint_avail, ), ) return m