def test_linear_function_iadd(): # Test inplace add on unattached LinearFunction_C lf = P.tottime * X.tottime + P.totcost * X.totcost lf += X("totcost*tottime") * P("fake") assert lf == P.tottime * X.tottime + P.totcost * X.totcost + X( "totcost*tottime") * P("fake") # Test inplace add on attached LinearFunction_C m = larch.Model(utility_ca=P.tottime * X.tottime + P.totcost * X.totcost) m.utility_ca += X("totcost*tottime") * P("fake") xx = P.tottime * X.tottime + P.totcost * X.totcost + X( "totcost*tottime") * P("fake") assert m.utility_ca == xx
def cdap_base_utility_by_person(model, n_persons, spec, alts=None, value_tokens=()): """ Build the base utility by person for each pattern. Parameters ---------- model : larch.Model n_persons : int spec : pandas.DataFrame The base utility by person spec provided by the ActivitySim framework. alts : dict, optional The keys are the names of the patterns, and the values are the alternative code numbers, as created by `generate_alternatives`. If not given, the alts are automatically regenerated using that function. value_tokens : list-like of str, optional A list of tokens to edit within an the expressions, generally the column names of the provided values from the estimation data bundle. Only used when `n_persons` is more than 1. """ if n_persons == 1: for i in spec.index: if not pd.isna(spec.loc[i, "M"]): model.utility_co[1] += X(spec.Expression[i]) * P(spec.loc[i, "M"]) if not pd.isna(spec.loc[i, "N"]): model.utility_co[2] += X(spec.Expression[i]) * P(spec.loc[i, "N"]) if not pd.isna(spec.loc[i, "H"]): model.utility_co[3] += X(spec.Expression[i]) * P(spec.loc[i, "H"]) else: if alts is None: alts = generate_alternatives(n_persons) person_numbers = range(1, n_persons + 1) for pnum in person_numbers: for i in spec.index: for aname, anum in alts.items(): z = pnum - 1 if not pd.isna(spec.loc[i, aname[z]]): x = apply_replacements(spec.Expression[i], f"p{pnum}", value_tokens) model.utility_co[anum] += X(x) * P(spec.loc[i, aname[z]])
def test_parameter_c(): p = ParameterRef_C("hsh") assert "hsh" == p assert p == "hsh" assert not keyword.iskeyword(p) assert hash(p) == hash("hsh") assert repr(p) == "P.hsh" assert p == P.hsh assert p == P("hsh") assert p == P['hsh']
def test_overspec(): m0 = larch.Model.Example(1) m0.utility_ca = m0.utility_ca + P.failpar * X('1') m0.utility_co[1] = P.ASC_DA m0.lock_value('tottime', -0.1) m0.utility_co[2] = P.ASC_SR2 + P('hhinc#23') * X.hhinc m0.utility_co[3] = P.ASC_SR3P + P('hhinc#23') * X.hhinc m0.remove_unused_parameters() m0.load_data() # constraint tests are unstable across platforms # r0 = m0.maximize_loglike( # quiet=True, # ) m0.set_values({ 'ASC_BIKE': -0.8550063261138748, 'ASC_DA': 0.9780172816142935, 'ASC_SR2': -1.0303087193826583, 'ASC_SR3P': -2.394702207497934, 'ASC_TRAN': 1.2134607482035888, 'ASC_WALK': 2.0885392231767055, 'failpar': -1.2138930556454395e-14, 'hhinc#23': -0.001647452425832848, 'hhinc#4': -0.005545798283823439, 'hhinc#5': -0.012530050562373019, 'hhinc#6': -0.010792561322141715, 'totcost': -0.005093524162084949, 'tottime': -0.1, }) m0.calculate_parameter_covariance() possover = m0.possible_overspecification assert possover.data.shape == (7, 2) assert all(possover.data.index.sort_values() == [ 'ASC_BIKE', 'ASC_DA', 'ASC_SR2', 'ASC_SR3P', 'ASC_TRAN', 'ASC_WALK', 'failpar', ])
def test_joint_parameter_summary(): m = larch.example(1, legacy=True) m.load_data() m.loglike_null() m.set_values( **{ 'ASC_BIKE': -2.3763275319243244, 'ASC_SR2': -2.1780143286612037, 'ASC_SR3P': -3.725078388760564, 'ASC_TRAN': -0.6708609582690096, 'ASC_WALK': -0.20677521181801753, 'hhinc#2': -0.0021699381002406883, 'hhinc#3': 0.0003577067151217295, 'hhinc#4': -0.00528632366072714, 'hhinc#5': -0.012807975284603574, 'hhinc#6': -0.009686302933787567, 'totcost': -0.00492023540098787, 'tottime': -0.05134209452571549, }) m.loglike() m.maximize_loglike() m_c = larch.Model(dataservice=m.dataservice) m_c.choice_ca_var = '_choice_' m_c.availability_var = '_avail_' m_c.utility_co[2] = P("ASC_SR2") m_c.utility_co[3] = P("ASC_SR3P") m_c.utility_co[4] = P("ASC_TRAN") m_c.utility_co[5] = P("ASC_BIKE") m_c.utility_co[6] = P("ASC_WALK") m_c.title = "Constants Only" m_c.load_data() m_c.loglike_null() m_c.maximize_loglike() from larch.util.summary import joint_parameter_summary stable_df(joint_parameter_summary([m, m_c], bases=[m_c]), 'joint_parameter_summary')
def test_overspec(): m0 = larch.Model.Example(1) m0.utility_ca = m0.utility_ca + P.failpar * X('1') m0.utility_co[1] = P.ASC_DA m0.lock_value('tottime', -0.1) m0.utility_co[2] = P.ASC_SR2 + P('hhinc#23') * X.hhinc m0.utility_co[3] = P.ASC_SR3P + P('hhinc#23') * X.hhinc m0.remove_unused_parameters() m0.load_data() r0 = m0.maximize_loglike(quiet=True, ) m0.calculate_parameter_covariance() possover = m0.possible_overspecification assert possover.data.shape == (7, 2) assert all(possover.data.index.sort_values() == [ 'ASC_BIKE', 'ASC_DA', 'ASC_SR2', 'ASC_SR3P', 'ASC_TRAN', 'ASC_WALK', 'failpar', ])
def linear_utility_from_spec(spec, x_col, p_col, ignore_x=(), segment_id=None): """ Create a linear function from a spec DataFrame. Parameters ---------- spec : pandas.DataFrame A spec for an ActivitySim model. x_col: str The name of the columns in spec representing the data. p_col: str or dict The name of the columns in spec representing the parameters. Give as a string for a single column, or as a dict to have segments on multiple columns. If given as a dict, the keys give the names of the columns to use, and the values give the identifiers that will need to match the loaded `segment_id` value. ignore_x : Collection, optional Labels in the spec file to ignore. Typically this includes variables that are pre-processed by ActivitySim and therefore don't need to be made available in Larch. segment_id : str, optional The CHOOSER_SEGMENT_COLUMN_NAME identified for ActivitySim. This value is ignored if `p_col` is a string, and required if `p_col` is a dict. Returns ------- LinearFunction_C """ if isinstance(p_col, dict): if segment_id is None: raise ValueError('segment_id must be given if p_col is a dict') partial_utility = {} for seg_p_col, segval in p_col.items(): partial_utility[seg_p_col] = linear_utility_from_spec( spec, x_col, seg_p_col, ignore_x, ) * X(f'{segment_id}=={segval}') return sum(partial_utility.values()) return sum( P(getattr(i, p_col)) * X(getattr(i, x_col)) for i in spec.itertuples() if (getattr(i, x_col) not in ignore_x) and not pd.isna(getattr(i, p_col)))
def apply_coef_template(linear_utility, template_col, condition=None): """ Apply a coefficient template over a linear utility function. Parameters ---------- linear_utility : LinearFunction_C template_col : Mapping condition : any Returns ------- LinearFunction_C """ result = sum( P("*".join(template_col.get(ip, ip) for ip in i.param.split("*"))) * i.data * i.scale for i in linear_utility) if condition is not None: result = result * condition return result
Bike = 4 Transit = 5 dfs = larch.DataFrames( co=df, alt_codes=[DA,SR,Walk,Bike,Transit], alt_names=['DA','SR','Walk','Bike','Transit'], ch_name='TOURMODE', ) # Model Definition m = larch.Model(dataservice=dfs) m.title = "Exampville Work Tour Mode Choice v1" from larch import P, X P('NamedParameter') X.NamedDataValue P('Named Parameter') X("log(INCOME)") P.InVehTime * X.AUTO_TIME + P.Cost * X.AUTO_COST m.utility_co[DA] = ( + P.InVehTime * X.AUTO_TIME + P.Cost * X.AUTO_COST # dollars per mile ) m.utility_co[SR] = ( + P.ASC_SR + P.InVehTime * X.AUTO_TIME + P.Cost * (X.AUTO_COST * 0.5) # dollars per mile, half share
def test_ch_av_summary_output(): skims = larch.OMX(larch.exampville.files.skims, mode='r') hh = pandas.read_csv(larch.exampville.files.hh) pp = pandas.read_csv(larch.exampville.files.person) tour = pandas.read_csv(larch.exampville.files.tour) pp_col = [ 'PERSONID', 'HHID', 'HHIDX', 'AGE', 'WORKS', 'N_WORK_TOURS', 'N_OTHER_TOURS', 'N_TOURS', 'N_TRIPS', 'N_TRIPS_HBW', 'N_TRIPS_HBO', 'N_TRIPS_NHB' ] raw = tour.merge(hh, on='HHID').merge(pp[pp_col], on=('HHID', 'PERSONID')) raw["HOMETAZi"] = raw["HOMETAZ"] - 1 raw["DTAZi"] = raw["DTAZ"] - 1 raw = raw[raw.TOURPURP == 1] f_tour = raw.join(skims.get_rc_dataframe( raw.HOMETAZi, raw.DTAZi, )) DA = 1 SR = 2 Walk = 3 Bike = 4 Transit = 5 dfs = larch.DataFrames( co=f_tour, alt_codes=[DA, SR, Walk, Bike, Transit], alt_names=['DA', 'SR', 'Walk', 'Bike', 'Transit'], ) m = larch.Model(dataservice=dfs) m.title = "Exampville Work Tour Mode Choice v1" m.utility_co[DA] = ( +P.InVehTime * X.AUTO_TIME + P.Cost * X.AUTO_COST # dollars per mile ) m.utility_co[SR] = ( +P.ASC_SR + P.InVehTime * X.AUTO_TIME + P.Cost * (X.AUTO_COST * 0.5) # dollars per mile, half share + P("HighInc:SR") * X("INCOME>75000")) m.utility_co[Walk] = (+P.ASC_Walk + P.NonMotorTime * X.WALK_TIME + P("HighInc:Walk") * X("INCOME>75000")) m.utility_co[Bike] = (+P.ASC_Bike + P.NonMotorTime * X.BIKE_TIME + P("HighInc:Bike") * X("INCOME>75000")) m.utility_co[Transit] = (+P.ASC_Transit + P.InVehTime * X.TRANSIT_IVTT + P.OutVehTime * X.TRANSIT_OVTT + P.Cost * X.TRANSIT_FARE + P("HighInc:Transit") * X("INCOME>75000")) # No choice or avail data set m.load_data() q = m.dataframes.choice_avail_summary() assert numpy.array_equal(q.columns, ['name', 'chosen', 'available']) assert q.index.identical( pandas.Index([1, 2, 3, 4, 5, '< Total All Alternatives >'], dtype='object')) assert numpy.array_equal(q.values, [ ['DA', None, None], ['SR', None, None], ['Walk', None, None], ['Bike', None, None], ['Transit', None, None], ['', 0, ''], ]) # Reasonable choice and avail data set m.choice_co_code = 'TOURMODE' m.availability_co_vars = { DA: 'AGE >= 16', SR: '1', Walk: 'WALK_TIME < 60', Bike: 'BIKE_TIME < 60', Transit: 'TRANSIT_FARE>0', } m.load_data() q = m.dataframes.choice_avail_summary() assert numpy.array_equal(q.columns, ['name', 'chosen', 'available']) assert q.index.identical( pandas.Index([1, 2, 3, 4, 5, '< Total All Alternatives >'], dtype='object')) assert numpy.array_equal(q['name'].values, ['DA', 'SR', 'Walk', 'Bike', 'Transit', '']) assert numpy.array_equal(q['chosen'].values, [6052., 810., 196., 72., 434., 7564.]) assert numpy.array_equal( q['available'].values, numpy.array([7564.0, 7564.0, 4179.0, 7564.0, 4199.0, ''], dtype=object)) # Unreasonable choice and avail data set m.choice_co_code = 'TOURMODE' m.availability_co_vars = { DA: 'AGE >= 26', SR: '1', Walk: 'WALK_TIME < 60', Bike: 'BIKE_TIME < 60', Transit: 'TRANSIT_FARE>0', } m.load_data() q = m.dataframes.choice_avail_summary() assert numpy.array_equal( q.columns, ['name', 'chosen', 'available', 'chosen but not available']) assert q.index.identical( pandas.Index([1, 2, 3, 4, 5, '< Total All Alternatives >'], dtype='object')) assert numpy.array_equal(q['name'].values, ['DA', 'SR', 'Walk', 'Bike', 'Transit', '']) assert numpy.array_equal(q['chosen'].values, [6052., 810., 196., 72., 434., 7564.]) assert numpy.array_equal( q['available'].values, numpy.array([6376.0, 7564.0, 4179.0, 7564.0, 4199.0, ''], dtype=object)) assert numpy.array_equal(q['chosen but not available'].values, [942.0, 0.0, 0.0, 0.0, 0.0, 942.0])
def test_simple_model_group(): df = pd.read_csv(example_file("MTCwork.csv.gz")) df.set_index(['casenum', 'altnum'], inplace=True) d = larch.DataFrames(df, ch='chose', crack=True) d.set_alternative_names({ 1: 'DA', 2: 'SR2', 3: 'SR3+', 4: 'Transit', 5: 'Bike', 6: 'Walk', }) m0 = larch.Model(dataservice=d) m0.utility_co[2] = P("ASC_SR2") + P("hhinc#2") * X("hhinc") m0.utility_co[3] = P("ASC_SR3P") + P("hhinc#3") * X("hhinc") m0.utility_co[4] = P("ASC_TRAN") + P("hhinc#4") * X("hhinc") m0.utility_co[5] = P("ASC_BIKE") + P("hhinc#5") * X("hhinc") m0.utility_co[6] = P("ASC_WALK") + P("hhinc#6") * X("hhinc") m0.utility_ca = ( (P("tottime_m") * X("tottime") + P("totcost_m") * X("totcost")) * X("femdum == 0") + (P("tottime_f") * X("tottime") + P("totcost_f") * X("totcost")) * X("femdum == 1")) m1 = larch.Model(dataservice=d.selector_co("femdum == 0")) m1.utility_co[2] = P("ASC_SR2") + P("hhinc#2") * X("hhinc") m1.utility_co[3] = P("ASC_SR3P") + P("hhinc#3") * X("hhinc") m1.utility_co[4] = P("ASC_TRAN") + P("hhinc#4") * X("hhinc") m1.utility_co[5] = P("ASC_BIKE") + P("hhinc#5") * X("hhinc") m1.utility_co[6] = P("ASC_WALK") + P("hhinc#6") * X("hhinc") m1.utility_ca = P("tottime_m") * X("tottime") + P("totcost_m") * X( "totcost") m2 = larch.Model(dataservice=d.selector_co("femdum == 1")) m2.utility_co[2] = P("ASC_SR2") + P("hhinc#2") * X("hhinc") m2.utility_co[3] = P("ASC_SR3P") + P("hhinc#3") * X("hhinc") m2.utility_co[4] = P("ASC_TRAN") + P("hhinc#4") * X("hhinc") m2.utility_co[5] = P("ASC_BIKE") + P("hhinc#5") * X("hhinc") m2.utility_co[6] = P("ASC_WALK") + P("hhinc#6") * X("hhinc") m2.utility_ca = P("tottime_f") * X("tottime") + P("totcost_f") * X( "totcost") m0.load_data() assert m0.loglike2().ll == approx(-7309.600971749625) m1.load_data() assert m1.loglike2().ll == approx(-4068.8091617468717) m2.load_data() assert m2.loglike2().ll == approx(-3240.7918100027578) from larch.model.model_group import ModelGroup mg = ModelGroup([m1, m2]) assert mg.loglike2().ll == approx(-7309.600971749625) assert mg.loglike() == approx(-7309.600971749625) pd.testing.assert_series_equal(mg.loglike2().dll.sort_index(), m0.loglike2().dll.sort_index()) m0.simple_step_bhhh() mg.set_values(**m0.pf.value) pd.testing.assert_series_equal(mg.loglike2().dll.sort_index(), m0.loglike2().dll.sort_index()) assert mg.loglike2().ll == approx(-4926.4822036792275) assert mg.check_d_loglike().data.similarity.min() > 4 result = mg.maximize_loglike(method='slsqp') assert result.loglike == approx(-3620.697668335103) mg2 = ModelGroup([]) mg2.append(m1) mg2.append(m2) assert mg2.loglike() == approx(-3620.697667552756) mg3 = ModelGroup([]) mg3.append(m1) mg3.append(m2) mg3.doctor() assert mg3.loglike() == approx(-3620.697667552756)
def test_ref_gen(): assert X["Asd"] == X("Asd") == X.Asd assert P["Asd"] == P("Asd") == P.Asd assert X.Asd != P.Asd
def test_pmath_in_utility(): d = larch.examples.MTC() m0 = larch.Model(dataservice=d) m0.utility_co[2] = P("ASC_SR2") * 10 + P("hhinc#2") / 10 * X("hhinc") m0.utility_co[3] = P("ASC_SR3P") * 10 + P("hhinc#3") / 10 * X("hhinc") m0.utility_co[4] = P("ASC_TRAN") * 10 + P("hhinc#4") / 10 * X("hhinc") m0.utility_co[5] = P("ASC_BIKE") * 10 + P("hhinc#5") / 10 * X("hhinc") m0.utility_co[6] = P("ASC_WALK") * 10 + P("hhinc#6") / 10 * X("hhinc") m0.utility_ca = ( +P("nonmotorized_time") / 10. * X("(altnum>4) * tottime") + P("motorized_ovtt") * 10 * X("(altnum <= 4) * ovtt") + P("motorized_ivtt") * X("(altnum <= 4) * ivtt") + PX("totcost")) m0.availability_var = '_avail_' m0.choice_ca_var = '_choice_' m1 = larch.Model(dataservice=d) m1.utility_co[2] = P("ASC_SR2") * X('10') + P("hhinc#2") * X("hhinc/10") m1.utility_co[3] = P("ASC_SR3P") * X('10') + P("hhinc#3") * X("hhinc/10") m1.utility_co[4] = P("ASC_TRAN") * X('10') + P("hhinc#4") * X("hhinc/10") m1.utility_co[5] = P("ASC_BIKE") * X('10') + P("hhinc#5") * X("hhinc/10") m1.utility_co[6] = P("ASC_WALK") * X('10') + P("hhinc#6") * X("hhinc/10") m1.utility_ca = (+P("nonmotorized_time") * X("(altnum>4) * tottime / 10") + P("motorized_ovtt") * X("(altnum <= 4) * ovtt * 10") + P("motorized_ivtt") * X("(altnum <= 4) * ivtt") + PX("totcost")) m1.availability_var = '_avail_' m1.choice_ca_var = '_choice_' m0.load_data() m1.load_data() r0 = m0.maximize_loglike(quiet=True) r1 = m1.maximize_loglike(quiet=True) assert r0.loglike == pytest.approx(-3587.6430040944942) assert r1.loglike == pytest.approx(-3587.6430040944942) m0.calculate_parameter_covariance() m1.calculate_parameter_covariance() t = { 'ASC_BIKE': -5.318650574990901, 'ASC_SR2': -22.291563439182628, 'ASC_SR3P': -22.174552606750527, 'ASC_TRAN': -3.293923857045225, 'ASC_WALK': 1.6172450189610719, 'hhinc#2': -1.4000897138949544, 'hhinc#3': 0.12900984170888324, 'hhinc#4': -3.0601742475362923, 'hhinc#5': -2.333410249527477, 'hhinc#6': -3.048442130390144, 'motorized_ivtt': -0.4116740527068954, 'motorized_ovtt': -12.958446214791113, 'nonmotorized_time': -11.789244777056298, 'totcost': -20.19350165272386, } assert dict(m0.pf['t_stat']) == pytest.approx(t, rel=1e-5) assert dict(m1.pf['t_stat']) == pytest.approx(t, rel=1e-5) assert (m0.get_value(P.motorized_ivtt) * 60) / ( m0.get_value(P.totcost) * 100) == pytest.approx(0.3191492801963062) assert m0.get_value((P.motorized_ivtt * 60) / (P.totcost * 100)) == pytest.approx(0.3191492801963062) assert (m1.get_value(P.motorized_ivtt) * 60) / ( m1.get_value(P.totcost) * 100) == pytest.approx(0.3191492801963062) assert m1.get_value((P.motorized_ivtt * 60) / (P.totcost * 100)) == pytest.approx(0.3191492801963062)
def test_piecewise_linear(): from larch.util.data_expansion import piecewise_linear func = piecewise_linear(X.DataName, P.ParamName, [3, 5, 7]) assert func[0] == P('ParamName ① up to 3') * X('piece(DataName,None,3)') assert func[1] == P('ParamName ② 3 to 5') * X('piece(DataName,3,5)') assert func[2] == P('ParamName ③ 5 to 7') * X('piece(DataName,5,7)') assert func[3] == P('ParamName ④ over 7') * X('piece(DataName,7,None)') assert len(func) == 4 func = piecewise_linear(X.DataName, breaks=[3, 5, 7]) assert func[0] == P('DataName ① up to 3') * X('piece(DataName,None,3)') assert func[1] == P('DataName ② 3 to 5') * X('piece(DataName,3,5)') assert func[2] == P('DataName ③ 5 to 7') * X('piece(DataName,5,7)') assert func[3] == P('DataName ④ over 7') * X('piece(DataName,7,None)') assert len(func) == 4 func = piecewise_linear('GenName', breaks=[3, 5, 7]) assert func[0] == P('GenName ① up to 3') * X('piece(GenName,None,3)') assert func[1] == P('GenName ② 3 to 5') * X('piece(GenName,3,5)') assert func[2] == P('GenName ③ 5 to 7') * X('piece(GenName,5,7)') assert func[3] == P('GenName ④ over 7') * X('piece(GenName,7,None)') assert len(func) == 4 with pytest.raises(ValueError): func = piecewise_linear('GenName', [3, 5, 7])
def location_choice_model( name="workplace_location", edb_directory="output/estimation_data_bundle/{name}/", coefficients_file="{name}_coefficients.csv", spec_file="{name}_SPEC.csv", size_spec_file="{name}_size_terms.csv", alt_values_file="{name}_alternatives_combined.csv", chooser_file="{name}_choosers_combined.csv", settings_file="{name}_model_settings.yaml", landuse_file="{name}_landuse.csv", return_data=False, ): model_selector = name.replace("_location", "") model_selector = model_selector.replace("_destination", "") model_selector = model_selector.replace("_subtour", "") model_selector = model_selector.replace("_tour", "") if model_selector == 'joint': model_selector = 'non_mandatory' edb_directory = edb_directory.format(name=name) def _read_csv(filename, **kwargs): filename = filename.format(name=name) return pd.read_csv(os.path.join(edb_directory, filename), **kwargs) coefficients = _read_csv( coefficients_file, index_col="coefficient_name", ) spec = _read_csv(spec_file, comment="#") alt_values = _read_csv(alt_values_file) chooser_data = _read_csv(chooser_file) landuse = _read_csv(landuse_file, index_col="zone_id") master_size_spec = _read_csv(size_spec_file) # remove temp rows from spec, ASim uses them to calculate the other values written # to the EDB, but they are not actually part of the utility function themselves. spec = spec.loc[~spec.Expression.isna()] spec = spec.loc[~spec.Expression.str.startswith("_")].copy() settings_file = settings_file.format(name=name) with open(os.path.join(edb_directory, settings_file), "r") as yf: settings = yaml.load( yf, Loader=yaml.SafeLoader, ) include_settings = settings.get("include_settings") if include_settings: include_settings = os.path.join(edb_directory, include_settings) if include_settings and os.path.exists(include_settings): with open(include_settings, "r") as yf: more_settings = yaml.load( yf, Loader=yaml.SafeLoader, ) settings.update(more_settings) CHOOSER_SEGMENT_COLUMN_NAME = settings.get("CHOOSER_SEGMENT_COLUMN_NAME") SEGMENT_IDS = settings.get("SEGMENT_IDS") if SEGMENT_IDS is None: SEGMENTS = settings.get("SEGMENTS") if SEGMENTS is not None: SEGMENT_IDS = {i: i for i in SEGMENTS} SIZE_TERM_SELECTOR = settings.get('SIZE_TERM_SELECTOR', model_selector) # filter size spec for this location choice only size_spec = (master_size_spec.query( f"model_selector == '{SIZE_TERM_SELECTOR}'").drop( columns="model_selector").set_index("segment")) size_spec = size_spec.loc[:, size_spec.max() > 0] size_coef = size_coefficients_from_spec(size_spec) indexes_to_drop = [ "util_size_variable", # pre-computed size (will be re-estimated) "util_size_variable_atwork", # pre-computed size (will be re-estimated) "util_utility_adjustment", # shadow pricing (ignored in estimation) "@df['size_term'].apply(np.log1p)", # pre-computed size (will be re-estimated) ] if 'Label' in spec.columns: indexes_to_drop = [ i for i in indexes_to_drop if i in spec.Label.to_numpy() ] label_column_name = 'Label' elif 'Expression' in spec.columns: indexes_to_drop = [ i for i in indexes_to_drop if i in spec.Expression.to_numpy() ] label_column_name = 'Expression' else: raise ValueError("cannot find Label or Expression in spec file") expression_labels = None if label_column_name == 'Expression': expression_labels = { expr: f"variable_label{n:04d}" for n, expr in enumerate(spec.Expression.to_numpy()) } # Remove shadow pricing and pre-existing size expression for re-estimation spec = (spec.set_index(label_column_name).drop( index=indexes_to_drop).reset_index()) if label_column_name == 'Expression': spec.insert(0, "Label", spec['Expression'].map(expression_labels)) alt_values['variable'] = alt_values['variable'].map(expression_labels) label_column_name = "Label" if name == 'trip_destination': CHOOSER_SEGMENT_COLUMN_NAME = 'primary_purpose' primary_purposes = spec.columns[3:] SEGMENT_IDS = {pp: pp for pp in primary_purposes} chooser_index_name = chooser_data.columns[0] x_co = chooser_data.set_index(chooser_index_name) x_ca = cv_to_ca( alt_values.set_index([chooser_index_name, alt_values.columns[1]])) if CHOOSER_SEGMENT_COLUMN_NAME is not None: # label segments with names SEGMENT_IDS_REVERSE = {v: k for k, v in SEGMENT_IDS.items()} x_co["_segment_label"] = x_co[CHOOSER_SEGMENT_COLUMN_NAME].apply( lambda x: SEGMENT_IDS_REVERSE[x]) else: x_co["_segment_label"] = size_spec.index[0] # compute total size values by segment for segment in size_spec.index: total_size_segment = pd.Series(0, index=landuse.index) x_co["total_size_" + segment] = 0 for land_use_field in size_spec.loc[segment].index: total_size_segment += (landuse[land_use_field] * size_spec.loc[segment, land_use_field]) x_co["total_size_" + segment] = total_size_segment.loc[ x_co["override_choice"]].to_numpy() # for each chooser, collate the appropriate total size value x_co["total_size_segment"] = 0 for segment in size_spec.index: labels = "total_size_" + segment rows = x_co["_segment_label"] == segment x_co.loc[rows, "total_size_segment"] = x_co[labels][rows] # Remove choosers with invalid observed choice (appropriate total size value = 0) valid_observed_zone = x_co["total_size_segment"] > 0 x_co = x_co[valid_observed_zone] x_ca = x_ca[x_ca.index.get_level_values(chooser_index_name).isin( x_co.index)] # Merge land use characteristics into CA data try: x_ca_1 = pd.merge(x_ca, landuse, on="zone_id", how="left") except KeyError: # Missing the zone_id variable? # Use the alternative id's instead, which assumes no sampling of alternatives x_ca_1 = pd.merge(x_ca, landuse, left_on=x_ca.index.get_level_values(1), right_index=True, how="left") x_ca_1.index = x_ca.index # Availability of choice zones if "util_no_attractions" in x_ca_1: av = x_ca_1["util_no_attractions"].apply( lambda x: False if x == 1 else True).astype(np.int8) elif "@df['size_term']==0" in x_ca_1: av = x_ca_1["@df['size_term']==0"].apply( lambda x: False if x == 1 else True).astype(np.int8) else: av = 1 d = DataFrames(co=x_co, ca=x_ca_1, av=av) m = Model(dataservice=d) if len(spec.columns) == 4 and all( spec.columns == ['Label', 'Description', 'Expression', 'coefficient']): m.utility_ca = linear_utility_from_spec( spec, x_col="Label", p_col=spec.columns[-1], ignore_x=("local_dist", ), ) elif len(spec.columns) == 4 \ and all(spec.columns[:3] == ['Label', 'Description', 'Expression']) \ and len(SEGMENT_IDS) == 1 \ and spec.columns[3] == list(SEGMENT_IDS.values())[0]: m.utility_ca = linear_utility_from_spec( spec, x_col="Label", p_col=spec.columns[-1], ignore_x=("local_dist", ), ) else: m.utility_ca = linear_utility_from_spec( spec, x_col=label_column_name, p_col=SEGMENT_IDS, ignore_x=("local_dist", ), segment_id=CHOOSER_SEGMENT_COLUMN_NAME, ) if CHOOSER_SEGMENT_COLUMN_NAME is None: assert len(size_spec) == 1 m.quantity_ca = sum( P(f"{i}_{q}") * X(q) for i in size_spec.index for q in size_spec.columns if size_spec.loc[i, q] != 0) else: m.quantity_ca = sum( P(f"{i}_{q}") * X(q) * X(f"{CHOOSER_SEGMENT_COLUMN_NAME}=={str_repr(SEGMENT_IDS[i])}") for i in size_spec.index for q in size_spec.columns if size_spec.loc[i, q] != 0) apply_coefficients(coefficients, m) apply_coefficients(size_coef, m, minimum=-6, maximum=6) m.choice_co_code = "override_choice" if return_data: return ( m, Dict( edb_directory=Path(edb_directory), alt_values=alt_values, chooser_data=chooser_data, coefficients=coefficients, landuse=landuse, spec=spec, size_spec=size_spec, master_size_spec=master_size_spec, model_selector=model_selector, settings=settings, ), ) return m
def cdap_interaction_utility(model, n_persons, alts, interaction_coef, coefficients): person_numbers = list(range(1, n_persons + 1)) matcher = re.compile("coef_[HMN]_.*") interact_coef_map = {} for c in coefficients.index: if matcher.search(c): c_split = c.split("_") for j in c_split[2:]: interact_coef_map[(c_split[1], j)] = c if all((i == 'x' for i in j)): # wildcards also map to empty interact_coef_map[(c_split[1], '')] = c for (cardinality, activity), coefs in interaction_coef.groupby( ["cardinality", "activity"]): _logger.info( f"{n_persons} person households, interaction cardinality {cardinality}, activity {activity}" ) if cardinality > n_persons: continue elif cardinality == n_persons: this_aname = activity * n_persons this_altnum = alts[this_aname] for rowindex, row in coefs.iterrows(): expression = "&".join( f"(p{p}_ptype == {t})" for (p, t) in zip(person_numbers, row.interaction_ptypes) if t != "*") if expression: if (activity, row.interaction_ptypes) in interact_coef_map: linear_component = ( X(expression) * P(interact_coef_map[(activity, row.interaction_ptypes)])) else: linear_component = X(expression) * P(row.coefficient) else: if (activity, row.interaction_ptypes) in interact_coef_map: linear_component = P( interact_coef_map[(activity, row.interaction_ptypes)]) else: linear_component = P(row.coefficient) _logger.debug( f"utility_co[{this_altnum} {this_aname}] += {linear_component}" ) model.utility_co[this_altnum] += linear_component elif cardinality < n_persons: for combo in itertools.combinations(person_numbers, cardinality): pattern = interact_pattern(n_persons, combo, activity) for aname, anum in alts.items(): if pattern.match(aname): for rowindex, row in coefs.iterrows(): expression = "&".join( f"(p{p}_ptype == {t})" for (p, t) in zip(combo, row.interaction_ptypes) if t != "*") # interaction terms without ptypes (i.e. with wildcards) # only apply when the household size matches the cardinality if expression != "": if (activity, row.interaction_ptypes ) in interact_coef_map: linear_component = ( X(expression) * P(interact_coef_map[ (activity, row.interaction_ptypes)])) else: linear_component = X(expression) * P( row.coefficient) _logger.debug( f"utility_co[{anum} {aname}] += {linear_component}" ) model.utility_co[anum] += linear_component
def linear_utility_from_spec(spec, x_col, p_col, ignore_x=(), segment_id=None): """ Create a linear function from a spec DataFrame. Parameters ---------- spec : pandas.DataFrame A spec for an ActivitySim model. x_col: str The name of the columns in spec representing the data. p_col: str or dict The name of the columns in spec representing the parameters. Give as a string for a single column, or as a dict to have segments on multiple columns. If given as a dict, the keys give the names of the columns to use, and the values give the identifiers that will need to match the loaded `segment_id` value. ignore_x : Collection, optional Labels in the spec file to ignore. Typically this includes variables that are pre-processed by ActivitySim and therefore don't need to be made available in Larch. segment_id : str, optional The CHOOSER_SEGMENT_COLUMN_NAME identified for ActivitySim. This value is ignored if `p_col` is a string, and required if `p_col` is a dict. Returns ------- LinearFunction_C """ if isinstance(p_col, dict): if segment_id is None: raise ValueError("segment_id must be given if p_col is a dict") partial_utility = {} for seg_p_col, segval in p_col.items(): partial_utility[seg_p_col] = linear_utility_from_spec( spec, x_col, seg_p_col, ignore_x, ) * X(f"{segment_id}=={str_repr(segval)}") return sum(partial_utility.values()) parts = [] for i in spec.index: _x = spec.loc[i, x_col] try: _x = _x.strip() except AttributeError: if np.isnan(_x): _x = None else: raise _p = spec.loc[i, p_col] if _x is not None and (_x not in ignore_x) and not pd.isna(_p): # process coefficients when they are multiples instead of raw names if isinstance(_p, str) and "*" in _p: _p_star = [i.strip() for i in _p.split("*")] if len(_p_star) == 2: try: _p0 = float(_p_star[0]) except ValueError: # first term not a number, maybe the second is try: _p1 = float(_p_star[1]) except ValueError: # second term also not a number, it's just a star in a name _P = P(_p) else: # second term is a number, use the multiplier _P = P(_p_star[0]) * _p1 else: # first term is a number, ensure the second is not try: _p1 = float(_p_star[1]) except ValueError: # second term is not a number, use the multiplier _P = P(_p_star[1]) * _p0 else: # both terms are numbers, not allowed raise ValueError( f"parameter is just {_p}, I need a name") else: # not handling triple-multiple terms (or worse) _P = P(_p) else: _P = P(_p) parts.append(_P * X(_x)) return sum(parts)
def mode_choice_model( name, edb_directory="output/estimation_data_bundle/{name}/", return_data=False, override_filenames=None, ): if override_filenames is None: override_filenames = {} data = simple_simulate_data( name=name, edb_directory=edb_directory, **override_filenames, ) coefficients = data.coefficients coef_template = data.coef_template spec = data.spec chooser_data = data.chooser_data settings = data.settings chooser_data = clean_values( chooser_data, alt_names_to_codes=data.alt_names_to_codes, choice_code="override_choice_code", ) tree = construct_nesting_tree(data.alt_names, settings["NESTS"]) purposes = list(coef_template.columns) if "atwork" in name: purposes = ['atwork'] elif 'atwork' in purposes: purposes.remove('atwork') # Setup purpose specific models m = {purpose: Model(graph=tree, title=purpose) for purpose in purposes} for alt_code, alt_name in tree.elemental_names().items(): # Read in base utility function for this alt_name u = linear_utility_from_spec( spec, x_col="Label", p_col=alt_name, ignore_x=("#", ), ) for purpose in purposes: # Modify utility function based on template for purpose u_purp = sum((P(coef_template[purpose].get(i.param, i.param)) * i.data * i.scale) for i in u) m[purpose].utility_co[alt_code] = u_purp for model in m.values(): explicit_value_parameters(model) apply_coefficients(coefficients, m) avail = construct_availability(m[purposes[0]], chooser_data, data.alt_codes_to_names) d = DataFrames( co=chooser_data, av=avail, alt_codes=data.alt_codes, alt_names=data.alt_names, ) if 'atwork' not in name: for purpose, model in m.items(): model.dataservice = d.selector_co(f"tour_type=='{purpose}'") model.choice_co_code = "override_choice_code" else: for purpose, model in m.items(): model.dataservice = d model.choice_co_code = "override_choice_code" from larch.model.model_group import ModelGroup mg = ModelGroup(m.values()) if return_data: return ( mg, Dict( edb_directory=Path(edb_directory), chooser_data=chooser_data, avail=avail, coefficients=coefficients, coef_template=coef_template, spec=spec, settings=settings, ), ) return mg
def test_linear_func(): assert LinearComponent_C(param="pname", data="dname") == P.pname * X.dname assert type(list(P.singleton + P.pname * X.dname)[0]) is LinearComponent_C assert type(list(P.singleton + P.pname * X.dname)[1]) is LinearComponent_C assert type(list(+P.pname * X.dname + P.singleton)[0]) is LinearComponent_C assert type(list(+P.pname * X.dname + P.singleton)[1]) is LinearComponent_C assert list(-(P.pname * X.dname + P.singleton)) == [ LinearComponent_C('pname', 'dname', -1.0), LinearComponent_C('singleton', '1', -1.0), ] assert list(-(P.pname * X.dname - P.singleton)) == [ LinearComponent_C('pname', 'dname', -1.0), LinearComponent_C('singleton', '1', 1.0), ] assert list((P.pname * X.dname - P.singleton) * X.Sss) == [ LinearComponent_C(param='pname', data='dname*Sss', scale=1.0), LinearComponent_C(param='singleton', data='Sss', scale=-1.0), ] assert list(sum(PX(i) for i in ['Aaa', 'Bbb'])) == [ LinearComponent_C(param='Aaa', data='Aaa', scale=1.0), LinearComponent_C(param='Bbb', data='Bbb', scale=1.0), ] u = P.Aaa * X.Aaa + P.Bbb * X.Bbb u += P.Ccc * X.Ccc assert u == P.Aaa * X.Aaa + P.Bbb * X.Bbb + P.Ccc * X.Ccc assert P.ppp * X.xxx * 1.234 == P.ppp * 1.234 * X.xxx assert P.ppp * X.xxx * 1.234 == X.xxx * P.ppp * 1.234 assert P.ppp * X.xxx * 1.234 == X.xxx * 1.234 * P.ppp assert P.ppp * X.xxx * 1.234 == 1.234 * X.xxx * P.ppp assert P.ppp * X.xxx * 1.234 == 1.234 * P.ppp * X.xxx assert (P.ppp * X.xxx) * 1.234 == P.ppp * (1.234 * X.xxx) assert (P.ppp * X.xxx) * 1.234 == X.xxx * (P.ppp * 1.234) assert (P.ppp * X.xxx) * 1.234 == X.xxx * (1.234 * P.ppp) assert (P.ppp * X.xxx) * 1.234 == 1.234 * (X.xxx * P.ppp) assert (P.ppp * X.xxx) * 1.234 == 1.234 * (P.ppp * X.xxx) assert (P.ppp * X.xxx) * 1.234 == (P.ppp * 1.234) * X.xxx assert (P.ppp * X.xxx) * 1.234 == (X.xxx * P.ppp) * 1.234 assert (P.ppp * X.xxx) * 1.234 == (X.xxx * 1.234) * P.ppp assert (P.ppp * X.xxx) * 1.234 == (1.234 * X.xxx) * P.ppp assert (P.ppp * X.xxx) * 1.234 == (1.234 * P.ppp) * X.xxx assert (P.ppp * X.xxx * 1.234) == P.ppp * (1.234 * X.xxx) assert (P.ppp * X.xxx * 1.234) == X.xxx * (P.ppp * 1.234) assert (P.ppp * X.xxx * 1.234) == X.xxx * (1.234 * P.ppp) assert (P.ppp * X.xxx * 1.234) == 1.234 * (X.xxx * P.ppp) assert (P.ppp * X.xxx * 1.234) == 1.234 * (P.ppp * X.xxx) assert (P.ppp * X.xxx * 1.234) == (P.ppp * 1.234) * X.xxx assert (P.ppp * X.xxx * 1.234) == (X.xxx * P.ppp) * 1.234 assert (P.ppp * X.xxx * 1.234) == (X.xxx * 1.234) * P.ppp assert (P.ppp * X.xxx * 1.234) == (1.234 * X.xxx) * P.ppp assert (P.ppp * X.xxx * 1.234) == (1.234 * P.ppp) * X.xxx assert P.ppp * (X.xxx * 1.234) == P.ppp * (1.234 * X.xxx) assert P.ppp * (X.xxx * 1.234) == X.xxx * (P.ppp * 1.234) assert P.ppp * (X.xxx * 1.234) == X.xxx * (1.234 * P.ppp) assert P.ppp * (X.xxx * 1.234) == 1.234 * (X.xxx * P.ppp) assert P.ppp * (X.xxx * 1.234) == 1.234 * (P.ppp * X.xxx) assert P.ppp * (X.xxx * 1.234) == (P.ppp * 1.234) * X.xxx assert P.ppp * (X.xxx * 1.234) == (X.xxx * P.ppp) * 1.234 assert P.ppp * (X.xxx * 1.234) == (X.xxx * 1.234) * P.ppp assert P.ppp * (X.xxx * 1.234) == (1.234 * X.xxx) * P.ppp assert P.ppp * (X.xxx * 1.234) == (1.234 * P.ppp) * X.xxx assert (P.ppp * X.xxx) * X.xxx == P.ppp * X('xxx*xxx') assert (P.ppp * X.xxx) * (P("_") * X.xxx) == P.ppp * X('xxx*xxx') assert (P("_") * X.xxx) * (P.ppp * X.xxx) == P.ppp * X('xxx*xxx') # Test squaring a boolean assert (P.ppp * X('boolean(xxx)')) * X('boolean(xxx)') == P.ppp * X('boolean(xxx)') assert (P.ppp * X('boolean(xxx)')) * ( P("_") * X('boolean(xxx)')) == P.ppp * X('boolean(xxx)') assert ((P.p1 * X.x1 + P.p2 * X.x2) * (P('_') * 1.1 * X.x1 + P('_') * 2 * X.x2)) == ( P.p1 * 1.1 * X('x1*x1') + P.p1 * 2.0 * X('x1*x2') + P.p2 * 1.1 * X('x2*x1') + P.p2 * 2.0 * X('x2*x2'))