Ejemplo n.º 1
0
def test_latent_class_full_data(swissmetro_raw_df):

    dfs = larch.DataFrames(swissmetro_raw_df, alt_codes=[1, 2, 3])

    m1 = larch.Model(dataservice=dfs)
    m1.availability_co_vars = {
        1: "TRAIN_AV_SP",
        2: "SM_AV",
        3: "CAR_AV_SP",
    }
    m1.choice_co_code = 'CHOICE'
    m1.utility_co[1] = P("ASC_TRAIN") + X("TRAIN_CO*(GA==0)") * P("B_COST")
    m1.utility_co[2] = X("SM_CO*(GA==0)") * P("B_COST")
    m1.utility_co[3] = P("ASC_CAR") + X("CAR_CO") * P("B_COST")

    m2 = larch.Model(dataservice=dfs)
    m2.availability_co_vars = {
        1: "TRAIN_AV_SP",
        2: "SM_AV",
        3: "CAR_AV_SP",
    }
    m2.choice_co_code = 'CHOICE'
    m2.utility_co[1] = P("ASC_TRAIN") + X("TRAIN_TT") * P("B_TIME") + X(
        "TRAIN_CO*(GA==0)") * P("B_COST")
    m2.utility_co[
        2] = X("SM_TT") * P("B_TIME") + X("SM_CO*(GA==0)") * P("B_COST")
    m2.utility_co[3] = P(
        "ASC_CAR") + X("CAR_TT") * P("B_TIME") + X("CAR_CO") * P("B_COST")

    km = larch.Model(dataservice=dfs, alts=[1, 2])
    km.utility_co[2] = P.W_OTHER

    from larch.model.latentclass import LatentClassModel
    m = LatentClassModel(km, {1: m1, 2: m2})

    m.load_data()

    m.set_value(P.ASC_CAR, 0.125 / 2)
    m.set_value(P.ASC_TRAIN, -0.398 / 2)
    m.set_value(P.B_COST, -.0126 / 2)
    m.set_value(P.B_TIME, -0.028 / 2)
    m.set_value(P.W_OTHER, 1.095 / 2)

    check1 = m.check_d_loglike()

    assert dict(check1.data.analytic) == approx({
        'ASC_CAR': -81.69736186616234,
        'ASC_TRAIN': -613.131371089499,
        'B_COST': -6697.31706964777,
        'B_TIME': -40104.940072046316,
        'W_OTHER': 245.43145056623683,
    })
Ejemplo n.º 2
0
def test_latent_class_mixed_data(swissmetro_raw_df):

    dfs = larch.DataFrames(swissmetro_raw_df, alt_codes=[1, 2, 3])

    m1 = larch.Model(dataservice=dfs)
    m1.availability_co_vars = {
        1: "TRAIN_AV_SP",
        2: "SM_AV",
        3: "CAR_AV_SP",
    }
    m1.choice_co_code = 'CHOICE'
    m1.utility_co[1] = P("ASC_TRAIN") + X("TRAIN_CO*(GA==0)") * P("B_COST")
    m1.utility_co[2] = X("SM_CO*(GA==0)") * P("B_COST")
    m1.utility_co[3] = P("ASC_CAR") + X("CAR_CO") * P("B_COST")

    m2 = larch.Model(dataservice=dfs)
    m2.availability_co_vars = {
        1: "TRAIN_AV_SP",
        2: "SM_AV",
        3: "CAR_AV_SP",
    }
    m2.choice_co_code = 'CHOICE'
    m2.utility_co[1] = P("ASC_TRAIN") + X("TRAIN_TT") * P("B_TIME") + X(
        "TRAIN_CO*(GA==0)") * P("B_COST")
    m2.utility_co[
        2] = X("SM_TT") * P("B_TIME") + X("SM_CO*(GA==0)") * P("B_COST")
    m2.utility_co[3] = P(
        "ASC_CAR") + X("CAR_TT") * P("B_TIME") + X("CAR_CO") * P("B_COST")

    dfs2 = larch.DataFrames(swissmetro_raw_df, alt_codes=[1, 2])
    km = larch.Model(dataservice=dfs2)
    km.utility_co[2] = P.W_OTHER

    from larch.model.latentclass import LatentClassModel
    with raises(ValueError):
        m = LatentClassModel(km, {1: m1, 2: m2})
Ejemplo n.º 3
0
def test_ch_av_summary_output():

    skims = larch.OMX(larch.exampville.files.skims, mode='r')
    hh = pandas.read_csv(larch.exampville.files.hh)
    pp = pandas.read_csv(larch.exampville.files.person)
    tour = pandas.read_csv(larch.exampville.files.tour)

    pp_col = [
        'PERSONID', 'HHID', 'HHIDX', 'AGE', 'WORKS', 'N_WORK_TOURS',
        'N_OTHER_TOURS', 'N_TOURS', 'N_TRIPS', 'N_TRIPS_HBW', 'N_TRIPS_HBO',
        'N_TRIPS_NHB'
    ]

    raw = tour.merge(hh, on='HHID').merge(pp[pp_col], on=('HHID', 'PERSONID'))
    raw["HOMETAZi"] = raw["HOMETAZ"] - 1
    raw["DTAZi"] = raw["DTAZ"] - 1

    raw = raw[raw.TOURPURP == 1]

    f_tour = raw.join(skims.get_rc_dataframe(
        raw.HOMETAZi,
        raw.DTAZi,
    ))

    DA = 1
    SR = 2
    Walk = 3
    Bike = 4
    Transit = 5

    dfs = larch.DataFrames(
        co=f_tour,
        alt_codes=[DA, SR, Walk, Bike, Transit],
        alt_names=['DA', 'SR', 'Walk', 'Bike', 'Transit'],
    )

    m = larch.Model(dataservice=dfs)
    m.title = "Exampville Work Tour Mode Choice v1"

    m.utility_co[DA] = (
        +P.InVehTime * X.AUTO_TIME + P.Cost * X.AUTO_COST  # dollars per mile
    )

    m.utility_co[SR] = (
        +P.ASC_SR + P.InVehTime * X.AUTO_TIME + P.Cost *
        (X.AUTO_COST * 0.5)  # dollars per mile, half share
        + P("HighInc:SR") * X("INCOME>75000"))

    m.utility_co[Walk] = (+P.ASC_Walk + P.NonMotorTime * X.WALK_TIME +
                          P("HighInc:Walk") * X("INCOME>75000"))

    m.utility_co[Bike] = (+P.ASC_Bike + P.NonMotorTime * X.BIKE_TIME +
                          P("HighInc:Bike") * X("INCOME>75000"))

    m.utility_co[Transit] = (+P.ASC_Transit + P.InVehTime * X.TRANSIT_IVTT +
                             P.OutVehTime * X.TRANSIT_OVTT +
                             P.Cost * X.TRANSIT_FARE +
                             P("HighInc:Transit") * X("INCOME>75000"))

    # No choice or avail data set
    m.load_data()
    q = m.dataframes.choice_avail_summary()
    assert numpy.array_equal(q.columns, ['name', 'chosen', 'available'])
    assert q.index.identical(
        pandas.Index([1, 2, 3, 4, 5, '< Total All Alternatives >'],
                     dtype='object'))
    assert numpy.array_equal(q.values, [
        ['DA', None, None],
        ['SR', None, None],
        ['Walk', None, None],
        ['Bike', None, None],
        ['Transit', None, None],
        ['', 0, ''],
    ])

    # Reasonable choice and avail data set
    m.choice_co_code = 'TOURMODE'
    m.availability_co_vars = {
        DA: 'AGE >= 16',
        SR: '1',
        Walk: 'WALK_TIME < 60',
        Bike: 'BIKE_TIME < 60',
        Transit: 'TRANSIT_FARE>0',
    }
    m.load_data()
    q = m.dataframes.choice_avail_summary()
    assert numpy.array_equal(q.columns, ['name', 'chosen', 'available'])
    assert q.index.identical(
        pandas.Index([1, 2, 3, 4, 5, '< Total All Alternatives >'],
                     dtype='object'))
    assert numpy.array_equal(q['name'].values,
                             ['DA', 'SR', 'Walk', 'Bike', 'Transit', ''])
    assert numpy.array_equal(q['chosen'].values,
                             [6052., 810., 196., 72., 434., 7564.])
    assert numpy.array_equal(
        q['available'].values,
        numpy.array([7564.0, 7564.0, 4179.0, 7564.0, 4199.0, ''],
                    dtype=object))

    # Unreasonable choice and avail data set
    m.choice_co_code = 'TOURMODE'
    m.availability_co_vars = {
        DA: 'AGE >= 26',
        SR: '1',
        Walk: 'WALK_TIME < 60',
        Bike: 'BIKE_TIME < 60',
        Transit: 'TRANSIT_FARE>0',
    }
    m.load_data()
    q = m.dataframes.choice_avail_summary()
    assert numpy.array_equal(
        q.columns, ['name', 'chosen', 'available', 'chosen but not available'])
    assert q.index.identical(
        pandas.Index([1, 2, 3, 4, 5, '< Total All Alternatives >'],
                     dtype='object'))
    assert numpy.array_equal(q['name'].values,
                             ['DA', 'SR', 'Walk', 'Bike', 'Transit', ''])
    assert numpy.array_equal(q['chosen'].values,
                             [6052., 810., 196., 72., 434., 7564.])
    assert numpy.array_equal(
        q['available'].values,
        numpy.array([6376.0, 7564.0, 4179.0, 7564.0, 4199.0, ''],
                    dtype=object))
    assert numpy.array_equal(q['chosen but not available'].values,
                             [942.0, 0.0, 0.0, 0.0, 0.0, 942.0])
Ejemplo n.º 4
0
def test_latent_class(swissmetro_raw_df):

    dfs = larch.DataFrames(swissmetro_raw_df, alt_codes=[1, 2, 3])

    m1 = larch.Model(dataservice=dfs)
    m1.availability_co_vars = {
        1: "TRAIN_AV_SP",
        2: "SM_AV",
        3: "CAR_AV_SP",
    }
    m1.choice_co_code = 'CHOICE'
    m1.utility_co[1] = P("ASC_TRAIN") + X("TRAIN_CO*(GA==0)") * P("B_COST")
    m1.utility_co[2] = X("SM_CO*(GA==0)") * P("B_COST")
    m1.utility_co[3] = P("ASC_CAR") + X("CAR_CO") * P("B_COST")

    m2 = larch.Model(dataservice=dfs)
    m2.availability_co_vars = {
        1: "TRAIN_AV_SP",
        2: "SM_AV",
        3: "CAR_AV_SP",
    }
    m2.choice_co_code = 'CHOICE'
    m2.utility_co[1] = P("ASC_TRAIN") + X("TRAIN_TT") * P("B_TIME") + X(
        "TRAIN_CO*(GA==0)") * P("B_COST")
    m2.utility_co[
        2] = X("SM_TT") * P("B_TIME") + X("SM_CO*(GA==0)") * P("B_COST")
    m2.utility_co[3] = P(
        "ASC_CAR") + X("CAR_TT") * P("B_TIME") + X("CAR_CO") * P("B_COST")

    km = larch.Model()
    km.utility_co[2] = P.W_OTHER

    from larch.model.latentclass import LatentClassModel
    m = LatentClassModel(km, {1: m1, 2: m2})

    m.load_data()

    m.set_value(P.ASC_CAR, 0.125 / 2)
    m.set_value(P.ASC_TRAIN, -0.398 / 2)
    m.set_value(P.B_COST, -.0126 / 2)
    m.set_value(P.B_TIME, -0.028 / 2)
    m.set_value(P.W_OTHER, 1.095 / 2)

    check1 = m.check_d_loglike()

    assert dict(check1.data.analytic) == approx({
        'ASC_CAR': -81.69736186616234,
        'ASC_TRAIN': -613.131371089499,
        'B_COST': -6697.31706964777,
        'B_TIME': -40104.940072046316,
        'W_OTHER': 245.43145056623683,
    })

    assert check1.data.similarity.min() > 4

    m.set_value(P.ASC_CAR, 0.125)
    m.set_value(P.ASC_TRAIN, -0.398)
    m.set_value(P.B_COST, -.0126)
    m.set_value(P.B_TIME, -0.028)
    m.set_value(P.W_OTHER, 1.095)

    assert m.loglike() == approx(-5208.502259337974)

    check2 = m.check_d_loglike()

    assert dict(check2.data.analytic) == approx({
        'ASC_CAR': 0.6243716033364302,
        'ASC_TRAIN': 0.9297965389102578,
        'B_COST': -154.03997923797007,
        'B_TIME': 76.19297915128493,
        'W_OTHER': -0.7936963902343083,
    })

    assert check2.data.similarity.min(
    ) > 2  # similarity is a bit lower very close to the optimum
Ejemplo n.º 5
0
def test_simple_model_group():

    df = pd.read_csv(example_file("MTCwork.csv.gz"))
    df.set_index(['casenum', 'altnum'], inplace=True)
    d = larch.DataFrames(df, ch='chose', crack=True)
    d.set_alternative_names({
        1: 'DA',
        2: 'SR2',
        3: 'SR3+',
        4: 'Transit',
        5: 'Bike',
        6: 'Walk',
    })

    m0 = larch.Model(dataservice=d)
    m0.utility_co[2] = P("ASC_SR2") + P("hhinc#2") * X("hhinc")
    m0.utility_co[3] = P("ASC_SR3P") + P("hhinc#3") * X("hhinc")
    m0.utility_co[4] = P("ASC_TRAN") + P("hhinc#4") * X("hhinc")
    m0.utility_co[5] = P("ASC_BIKE") + P("hhinc#5") * X("hhinc")
    m0.utility_co[6] = P("ASC_WALK") + P("hhinc#6") * X("hhinc")
    m0.utility_ca = (
        (P("tottime_m") * X("tottime") + P("totcost_m") * X("totcost")) *
        X("femdum == 0") +
        (P("tottime_f") * X("tottime") + P("totcost_f") * X("totcost")) *
        X("femdum == 1"))

    m1 = larch.Model(dataservice=d.selector_co("femdum == 0"))
    m1.utility_co[2] = P("ASC_SR2") + P("hhinc#2") * X("hhinc")
    m1.utility_co[3] = P("ASC_SR3P") + P("hhinc#3") * X("hhinc")
    m1.utility_co[4] = P("ASC_TRAN") + P("hhinc#4") * X("hhinc")
    m1.utility_co[5] = P("ASC_BIKE") + P("hhinc#5") * X("hhinc")
    m1.utility_co[6] = P("ASC_WALK") + P("hhinc#6") * X("hhinc")
    m1.utility_ca = P("tottime_m") * X("tottime") + P("totcost_m") * X(
        "totcost")

    m2 = larch.Model(dataservice=d.selector_co("femdum == 1"))
    m2.utility_co[2] = P("ASC_SR2") + P("hhinc#2") * X("hhinc")
    m2.utility_co[3] = P("ASC_SR3P") + P("hhinc#3") * X("hhinc")
    m2.utility_co[4] = P("ASC_TRAN") + P("hhinc#4") * X("hhinc")
    m2.utility_co[5] = P("ASC_BIKE") + P("hhinc#5") * X("hhinc")
    m2.utility_co[6] = P("ASC_WALK") + P("hhinc#6") * X("hhinc")
    m2.utility_ca = P("tottime_f") * X("tottime") + P("totcost_f") * X(
        "totcost")

    m0.load_data()
    assert m0.loglike2().ll == approx(-7309.600971749625)

    m1.load_data()
    assert m1.loglike2().ll == approx(-4068.8091617468717)

    m2.load_data()
    assert m2.loglike2().ll == approx(-3240.7918100027578)

    from larch.model.model_group import ModelGroup

    mg = ModelGroup([m1, m2])

    assert mg.loglike2().ll == approx(-7309.600971749625)
    assert mg.loglike() == approx(-7309.600971749625)

    pd.testing.assert_series_equal(mg.loglike2().dll.sort_index(),
                                   m0.loglike2().dll.sort_index())

    m0.simple_step_bhhh()
    mg.set_values(**m0.pf.value)

    pd.testing.assert_series_equal(mg.loglike2().dll.sort_index(),
                                   m0.loglike2().dll.sort_index())

    assert mg.loglike2().ll == approx(-4926.4822036792275)
    assert mg.check_d_loglike().data.similarity.min() > 4

    result = mg.maximize_loglike(method='slsqp')
    assert result.loglike == approx(-3620.697668335103)

    mg2 = ModelGroup([])
    mg2.append(m1)
    mg2.append(m2)
    assert mg2.loglike() == approx(-3620.697667552756)

    mg3 = ModelGroup([])
    mg3.append(m1)
    mg3.append(m2)
    mg3.doctor()
    assert mg3.loglike() == approx(-3620.697667552756)
Ejemplo n.º 6
0
    skims.get_rc_dataframe(
        df["HOMETAZi"], df["DTAZi"],
    )
)


# For clarity, we can define numbers as names for modes
DA = 1
SR = 2
Walk = 3
Bike = 4
Transit = 5

dfs = larch.DataFrames(
    co=df,
    alt_codes=[DA,SR,Walk,Bike,Transit],
    alt_names=['DA','SR','Walk','Bike','Transit'],
    ch_name='TOURMODE',
)

# Model Definition
m = larch.Model(dataservice=dfs)
m.title = "Exampville Work Tour Mode Choice v1"

from larch import P, X
P('NamedParameter')
X.NamedDataValue
P('Named Parameter')
X("log(INCOME)")
P.InVehTime * X.AUTO_TIME + P.Cost * X.AUTO_COST

m.utility_co[DA] = (