Ejemplo n.º 1
0
def test__bfgs_optimization():
    """
    Ensure that the bfgs optimization properly processes the input for one
    iteration. The value of 0.4044 was computed by hand for
    comparison purposes
    """
    X_, y_ = X.reshape(N, J, K), y.reshape(N, J)
    betas = np.array([.1, .1])
    model = MultinomialLogit()
    res = model._bfgs_optimization(betas, X_, y_, None, None, 0)

    assert pytest.approx(res['fun'], 0.40443136)
Ejemplo n.º 2
0
def test_fit():
    """
    Ensures the log-likelihood works for a single iterations with the default
    initial coefficients. The value of 0.4044 was computed by hand for
    comparison purposes
    """
    model = MultinomialLogit()
    model.fit(X,
              y,
              varnames=varnames,
              alts=alts,
              ids=ids,
              maxiter=0,
              verbose=0)

    assert pytest.approx(model.loglikelihood, -0.40443136)
Ejemplo n.º 3
0
def test_log_likelihood():
    """
    Computes the log-likelihood "by hand" for a simple example and ensures
    that the one returned by xlogit is the same
    """
    X_, y_ = X.reshape(N, J, K), y.reshape(N, J)
    betas = np.array([.1, .1])

    # Compute log likelihood using xlogit
    model = MultinomialLogit()
    obtained_loglik, _, _ = model._loglik_and_gradient(betas, X_, y_, None,
                                                       None)

    # Compute expected log likelihood "by hand"
    eXB = np.exp(X_.dot(betas))
    expected_loglik = np.sum(
        np.log(np.sum(eXB / np.sum(eXB, axis=1, keepdims=True) * y_, axis=1)))

    assert pytest.approx(expected_loglik, obtained_loglik)
Ejemplo n.º 4
0
def example9_run():
    df = pd.read_csv(
        "https://raw.githubusercontent.com/arteagac/xlogit/master/examples/data/electricity_long.csv"
    )

    varnames = ["pf", "cl", "loc", "wk", "tod", "seas"]
    X = df[varnames].values
    y = df['choice'].values
    choice_id = df['chid']
    alt = [1, 2, 3, 4]
    np.random.seed(123)

    model = MultinomialLogit()
    model.fit(
        X=df[varnames],
        y=y,
        varnames=varnames,
        isvars=[],
        alts=alt,
        fit_intercept=True,
        # hess=False,
        # grad=False,
        # method="L-BFGS-B"
        # tol=1e-4,
        # scipy_optimisation=True
    )

    model.summary()
Ejemplo n.º 5
0
def example10_run():
    # df = pd.read_csv("examples_prit/Final_HBW_WC_Long.csv")
    df = pd.read_csv("xlogitprit/examples_prit/Final_HBW_WC_Long.csv")
    # Accessibility Time (One coefficient per alternative)
    df['ACT_PT'] = df['act'] * ((df['alt'] == 'w2pt') | (df['alt'] == 'pr') |
                                (df['alt'] == 'kr'))

    # Waiting Time (One coefficient per alternative)
    df['WT_PT'] = df['wt'] * ((df['alt'] == 'w2pt') | (df['alt'] == 'pr') |
                              (df['alt'] == 'kr'))

    df['EMP_DENS'] = df['emp_dens'] * ((df['alt'] == 'w2pt') |
                                       (df['alt'] == 'pr') |
                                       (df['alt'] == 'kr'))

    df['ADUL_VEH'] = df['adul_veh'] * ((df['alt'] == 'cad') |
                                       (df['alt'] == 'pr'))

    #To be provided by the user
    choice_id = df['TRIPID']
    ind_id = df['TRIPID']
    varnames = ['tt', 'tc', 'ACT_PT', 'WT_PT', 'EMP_DENS', 'ADUL_VEH']
    # asvarnames = ['tt','tc','ACT_PT', 'WT_PT', 'EMP_DENS','ADUL_VEH']
    isvarnames = []
    X = df[varnames].values
    y = df['Chosen_Mode'].values
    choice_set = ['cad', 'cap', 'w2pt', 'pr', 'kr', 'cycle', 'walk']
    choice_var = df['Chosen_Mode']
    alt_var = df['alt']
    randvars = {'EMP_DENS': 'n', 'WT_PT': 'u'}
    R = 200
    Tol = 1e-6

    model = MultinomialLogit()
    # init_coeff = [-2, -4, -3, -2, -1, -1, 0, 0, -0, -0.01, 0.0001, -1.15]
    model.fit(
        X=df[varnames],
        y=choice_var,
        varnames=varnames,  # init_coeff=np.repeat(.1, 11),
        isvars=isvarnames,
        alts=alt_var,
        ids=choice_id,  # gtol=1e-1,
        #   randvars=randvars,
        fit_intercept=True,
        hess=False,
        gtol=1e-1,
        #   weights=[1, 1, 10, 10, 10, 100, 1]
    )  #, init_coeff=init_coeff, tol=1e-2) #hess=False, grad=False)
    model.summary()
Ejemplo n.º 6
0
def example6_run():
    df = pd.read_csv("https://raw.githubusercontent.com/arteagac/xlogit/master/examples/data/fishing_long.csv")

    varnames = ['price', 'catch', 'income']
    X = df[varnames].values
    y = df['choice'].values
    asvarnames = ['price']
    isvarnames = ['income', 'catch']
    rand_vars = {'price': 'n'}
    alts = [1, 2, 3, 4]

    # choice_id = df['chid']

    model = MultinomialLogit()

    # N, _ = Xd
    N = len(np.unique(df['id'].values))
    training_size = int(0.8*N)
    ids = np.random.choice(N, training_size, replace=False)
    train_idx = [ii for ii, id_val in enumerate(df['id']) if id_val in ids]
    test_idx = [ii for ii, id_val in enumerate(df['id']) if id_val not in ids]

    X_train = X[train_idx]
    y_train = y[train_idx]
    X_test = X[test_idx]
    y_test = y[test_idx]

    model.fit(X_train, y_train, varnames=varnames, alts=alts,
              fit_intercept=True,
              isvars=isvarnames
            #   scipy_optimisation=False,
            #   isvars=isvarnames, #transvars=['price', 'catch'],  # randvars=rand_vars, #transvars=['price', 'catch'],
            #, hess=False, grad=False
            )
    model.summary()
    model.validation_loglik(X_test, y_test)
Ejemplo n.º 7
0
def example4_run():
    df = pd.read_csv("https://raw.githubusercontent.com/arteagac/xlogit/master/examples/data/electricity_long.csv")

    varnames = ["pf", "cl", "loc", "wk", "tod", "seas"]

    X = df[varnames].values
    y = df['choice'].values
    # df['seas'] = -df
    choice_id = df['chid']
    alts = [1, 2, 3, 4]
    np.random.seed(123)
    model = MultinomialLogit()

    maxiter = 1000

    model.fit(X=X, y=y, varnames=varnames, isvars=[], alts=alts, fit_intercept=True,
            transformation="boxcox", maxiter=maxiter, gtol=1e-3)

    model.summary()
Ejemplo n.º 8
0
def example5_run():
    df = pd.read_csv("https://raw.githubusercontent.com/arteagac/xlogit/master/examples/data/electricity_long.csv")

    varnames = ["pf", "cl", "loc", "wk", "tod", "seas"]

    # df['tod'] = -df['tod']
    # df['seas'] = -df['seas']
    # print('sum', sum(np.where(df)))
    # print("df['seas']", df['seas'])
    # print(1/0)
    X = df[varnames].values
    y = df['choice'].values
    choice_id = df['chid']
    alt = [1, 2, 3, 4]
    np.random.seed(123)

    print('covariance', np.cov(np.transpose(X)))

    # print(1/0)

    model = MultinomialLogit()
    model.fit(X, y,
            varnames,
            alts=alt,
            #   randvars={'seas': 'ln', 'wk': 'n', 'pf': 'n', 'loc': 'n'},
            fit_intercept=True,
            #   transformation="boxcox",
            #   transvars=['wk', 'seas'],
            #   correlation=True,
            # ids=choice_id,
            #   panels=df.id.values,
            # tol=1e-4,
            # grad=False,
            # hess=False,
            isvars=[],
            #   verbose=1,
            # halton=False,
            #   method='L-BFGS-B',
            #   n_draws=600
            )
    model.summary()
Ejemplo n.º 9
0
def example13_run():
    df = pd.read_csv("xlogitprit/examples_prit/Final_HBW_WC_Long.csv")

    # Accessibility Time (One coefficient per alternative)
    df['ACT_PT'] = df['act'] * ((df['alt'] == 'w2pt') | (df['alt'] == 'pr') |
                                (df['alt'] == 'kr'))

    # Waiting Time (One coefficient per alternative)
    df['WT_PT'] = df['wt'] * ((df['alt'] == 'w2pt') | (df['alt'] == 'pr') |
                              (df['alt'] == 'kr'))

    df['EMP_DENS'] = df['emp_dens'] * ((df['alt'] == 'w2pt') |
                                       (df['alt'] == 'pr') |
                                       (df['alt'] == 'kr'))

    df['ADUL_VEH'] = df['adul_veh'] * ((df['alt'] == 'cad') |
                                       (df['alt'] == 'pr'))

    #To be provided by the user
    choice_id = df['TRIPID']
    ind_id = df['TRIPID']
    varnames = ['tt', 'tc', 'ACT_PT', 'WT_PT', 'EMP_DENS', 'ADUL_VEH']
    # asvarnames = ['tt','tc','ACT_PT', 'WT_PT', 'EMP_DENS','ADUL_VEH']
    isvarnames = []
    X = df[varnames].values
    y = df['Chosen_Mode'].values
    choice_set = ['cad', 'cap', 'w2pt', 'pr', 'kr', 'cycle', 'walk']
    choice_var = df['Chosen_Mode']
    alt_var = df['alt']
    randvars = {'EMP_DENS': 'n', 'WT_PT': 'u'}
    R = 200
    Tol = 1e-6

    def df_coeff_col(seed, dataframe, names_asvars, choiceset, var_alt):
        np.random.seed(seed)
        random_matrix = np.random.randint(1,
                                          len(choiceset) + 1,
                                          (len(choiceset), len(names_asvars)))
        #print(random_matrix)

        ## Finding coefficients type (alt-specific or generic) for corresponding variables
        alt_spec_pos = []
        for i in range(random_matrix.shape[1]):
            pos_freq = pd.Series(range(len(random_matrix[:, i]))).groupby(
                random_matrix[:, i], sort=False).apply(list).tolist()
            alt_spec_pos.append(pos_freq)

        for i in range(len(alt_spec_pos)):
            for j in range(len(alt_spec_pos[i])):
                for k in range(len(alt_spec_pos[i][j])):
                    alt_spec_pos[i][j][k] = choiceset[alt_spec_pos[i][j][k]]
        ## creating dummy columns based on the coefficient type
        asvars_new = []
        for i in range(len(alt_spec_pos)):
            for j in range(len(alt_spec_pos[i])):
                if len(alt_spec_pos[i][j]) < len(choiceset):
                    dataframe[names_asvars[i] + '_' +
                              '_'.join(alt_spec_pos[i][j])] = dataframe[
                                  names_asvars[i]] * np.isin(
                                      var_alt, alt_spec_pos[i][j])
                    asvars_new.append(names_asvars[i] + '_' +
                                      '_'.join(alt_spec_pos[i][j]))
                else:
                    asvars_new.append(names_asvars[i])
        return (asvars_new)

    new_asvars = df_coeff_col(1, df, varnames, choice_set, alt_var)

    varnames = new_asvars

    model = MultinomialLogit()

    # init_coeff = [-2, -4, -3, -2, -1, -1, 0, 0, -0, -0.01, 0.0001, -1.15]
    model.fit(
        X=df[varnames],
        y=choice_var,
        varnames=varnames,  # init_coeff=np.repeat(.1, 11),
        isvars=[],
        alts=alt_var,
        ids=choice_id,  # gtol=1e-1,
        #   randvars=randvars,
        fit_intercept=True,
        #   hess=False,
        #   gtol=1e-1,
        #   weights=[1, 1, 10, 10, 10, 100, 1]
    )  #, init_coeff=init_coeff, tol=1e-2) #hess=False, grad=False)
    model.summary()
Ejemplo n.º 10
0
#To be provided by the user
choice_id = df['TRIPID']
ind_id = df['TRIPID']
varnames = ['tt', 'tc', 'ACT_PT', 'WT_PT', 'EMP_DENS', 'ADUL_VEH']
# asvarnames = ['tt','tc','ACT_PT', 'WT_PT', 'EMP_DENS','ADUL_VEH']
isvarnames = []
X = df[varnames].values
y = df['Chosen_Mode'].values
choice_set = ['cad', 'cap', 'w2pt', 'pr', 'kr', 'cycle', 'walk']
choice_var = df['Chosen_Mode']
alt_var = df['alt']
randvars = {'EMP_DENS': 'n', 'WT_PT': 'u'}
R = 200
Tol = 1e-6

model = MultinomialLogit()
# init_coeff = [-2, -4, -3, -2, -1, -1, 0, 0, -0, -0.01, 0.0001, -1.15]
model.fit(
    X=df[varnames],
    y=choice_var,
    varnames=varnames,  # init_coeff=np.repeat(.1, 11),
    isvars=isvarnames,
    alts=alt_var,
    ids=choice_id,  # gtol=1e-1,
    #   randvars=randvars,
    fit_intercept=True,
    hess=False
    #   gtol=1e-1,
    #   weights=[1, 1, 10, 10, 10, 100, 1]
)  #, init_coeff=init_coeff, tol=1e-2) #hess=False, grad=False)
model.summary()
Ejemplo n.º 11
0
                                     (df['alt'] == 'sm'))

# Coefficient Age for train
df['age_train'] = df['AGE'] * (df['alt'] == 'train')

# Coefficient Luggage for car
df['luggage_car'] = df['LUGGAGE'] * (df['alt'] == 'car')

# Coefficient seatsconfig for car
df['seats'] = df['seatconf'] * (df['alt'] == 'sm')

varnames = [
    'asc_train', 'asc_car', 'cost', 'time', 'luggage_car', 'he_sm_train',
    'seats', 'ga_sm_train', 'age_train'
]
model = MultinomialLogit()
model.fit(
    X=df[varnames],
    y=df['CHOICE'],
    varnames=varnames,
    alts=df['alt'],
    ids=df['custom_id'],
    avail=df['av'],
    weights=np.ones(2)
    # randvars={'cost': 'n'},
    # transvars=['luggage_car']
    #   init_coeff=np.random.normal(0, 1, 9)
    #   scipy_optimisation=False
    # method="L-BFGS-B"
    #   tol=1e-3
)
Ejemplo n.º 12
0
# df['tod'] = -df['tod']
# df['seas'] = -df['seas']
# print('sum', sum(np.where(df)))
# print("df['seas']", df['seas'])
# print(1/0)
X = df[varnames].values
y = df['choice'].values
choice_id = df['chid']
alt = [1, 2, 3, 4]
np.random.seed(123)

print('covariance', np.cov(np.transpose(X)))

# print(1/0)

model = MultinomialLogit()
model.fit(
    X,
    y,
    varnames,
    alts=alt,
    #   randvars={'seas': 'ln', 'wk': 'n', 'pf': 'n', 'loc': 'n'},
    fit_intercept=True,
    #   transformation="boxcox",
    #   transvars=['wk', 'seas'],
    #   correlation=True,
    # ids=choice_id,
    #   panels=df.id.values,
    # tol=1e-4,
    # grad=False,
    # hess=False,
Ejemplo n.º 13
0
def example11_run():
    # df_wide = pd.read_csv("examples/data/swissmetro_training.csv")
    df_wide = pd.read_csv("xlogitprit/examples/data/swissmetro_training.csv")
    df_wide['custom_id'] = np.arange(len(df_wide))  # Add unique identifier

    #Let's rename some columns for convenient reshaping using pandas
    df_wide.rename(columns={
        "TRAIN_TT": "time_train",
        "SM_TT": "time_sm",
        "CAR_TT": "time_car",
        "TRAIN_CO": "cost_train",
        "SM_CO": "cost_sm",
        "CAR_CO": "cost_car",
        "TRAIN_HE": "headway_train",
        "SM_HE": "headway_sm",
        "SM_SEATS": "seatconf_sm",
        "TRAIN_AV": "av_train",
        "SM_AV": "av_sm",
        "CAR_AV": "av_car"
    },
                   inplace=True)

    # Convert from wide to long format using pandas.
    df = pd.wide_to_long(
        df_wide, ["time", "cost", "headway", "seatconf", "av"],
        i="custom_id",
        j="alt",
        sep="_",
        suffix='\w+').sort_values(by=['custom_id', 'alt']).reset_index()
    df = df.fillna(0)  # Fill unexisting values for some alternatives

    # Format the outcome variable approapriately
    df["CHOICE"] = df["CHOICE"].map({1: 'train', 2: 'sm', 3: 'car'})
    # Convert CHOICE to True if alternative was selected; False otherwise
    df["CHOICE"] = df["CHOICE"] == df["alt"]

    # Create model specification
    # Alternative Specific Constants
    df['asc_train'] = np.ones(len(df)) * (df['alt'] == 'train')
    df['asc_car'] = np.ones(len(df)) * (df['alt'] == 'car')

    # Coefficient GA for swissmetro and train
    df['ga_sm_train'] = df['GA'] * ((df['alt'] == 'train') |
                                    (df['alt'] == 'sm'))

    # Coefficient headway for swissmetro and train
    df['he_sm_train'] = df['headway'] * ((df['alt'] == 'train') |
                                         (df['alt'] == 'sm'))

    # Coefficient Age for train
    df['age_train'] = df['AGE'] * (df['alt'] == 'train')

    # Coefficient Luggage for car
    df['luggage_car'] = df['LUGGAGE'] * (df['alt'] == 'car')

    # Coefficient seatsconfig for car
    df['seats'] = df['seatconf'] * (df['alt'] == 'sm')

    varnames = [
        'asc_train', 'asc_car', 'cost', 'time', 'luggage_car', 'he_sm_train',
        'seats', 'ga_sm_train', 'age_train'
    ]
    model = MultinomialLogit()
    model.fit(
        X=df[varnames],
        y=df['CHOICE'],
        varnames=varnames,
        alts=df['alt'],
        ids=df['custom_id'],
        avail=df['av'],
        # randvars={'cost': 'n'},
        # transvars=['luggage_car']
        #   init_coeff=np.random.normal(0, 1, 9)
        #   scipy_optimisation=False
        # method="L-BFGS-B"
        #   tol=1e-3
    )
    model.summary()
Ejemplo n.º 14
0
import pandas as pd
import numpy as np

from xlogitprit import MultinomialLogit

df = pd.read_csv(
    "https://raw.githubusercontent.com/arteagac/xlogit/master/examples/data/fishing_long.csv"
)

varnames = ['price', 'catch']
X = df[varnames]
y = df['choice']
asvarnames = ['price', 'catch']
isvarnames = []
rand_vars = {'price': 'n'}
alts = [1, 2, 3, 4]
# choice_id = df['chid']

model = MultinomialLogit()

model.fit(
    X,
    y,
    varnames=varnames,
    alts=alts,
    isvars=isvarnames,
    transvars=['price',
               'catch'],  # randvars=rand_vars, #transvars=['price', 'catch'],
    fit_intercept=False  #, hess=False, grad=False
)
model.summary()
Ejemplo n.º 15
0
    sep="_",
    suffix='\w+').sort_values(by=['custom_id', 'alt']).reset_index()

# Fill unexisting values for some alternatives
df = df.fillna(0)
# Format the outcome variable approapriatly
df["CHOICE"] = df["CHOICE"].map({1: 'train', 2: 'sm', 3: 'car'})
# Convert CHOICE to True if alternative was selected; False otherwise
df["CHOICE"] = df["CHOICE"] == df["alt"]
# Scale variables
df['time'] = df['time'] / 100
train_pass = ((df["GA"] == 1) & (df["alt"].isin(['train', 'sm']))).astype(int)
df['cost'] = df['cost'] * (train_pass == 0) / 100

# Create alternative specific constants
df['asc_train'] = np.ones(len(df)) * (df['alt'] == 'train')
df['asc_car'] = np.ones(len(df)) * (df['alt'] == 'car')

varnames = [
    'asc_car', 'asc_train', 'cost', 'time', 'luggage_car', 'he_sm_train',
    'seats', 'ga_sm_train', 'age_train'
]
model = MultinomialLogit()
model.fit(X=df[varnames], y=df['CHOICE'], varnames=varnames, alts=df['alt'])
# model = MixedLogit()
# model.fit(X=df[varnames], y=df['CHOICE'], varnames=varnames, alts=df['alt'],
#         #   transvars=['cost'],
#           ids=df['custom_id'], avail=df['av'], randvars={'time': 'n'}, n_draws=2000,
#           # tol=1e-10
#           )
model.summary()
Ejemplo n.º 16
0
import pandas as pd
import time
import matplotlib.pyplot as plt

#To be provided by the user
df = pd.read_csv(
    "https://raw.githubusercontent.com/timothyb0912/pylogit/master/examples/data/electricity_r_data_long.csv"
)
choice_id = df['chid']
ind_id = df['id']
varnames = ['cl', 'loc', 'wk', 'tod', 'seas']
# asvarnames = ['pf','cl','loc','wk','tod', 'seas']
# isvarnames = []
alternatives = [1, 2, 3, 4]
choice_var = df['choice']
alt_var = df['alt']
R = 200
dist = ['n', 'ln', 'tn', 'u', 't', 'f']
#dist = ['n', 'ln', 'u', 'f']

model = MultinomialLogit()
# init_coeffs = np.repeat(.0, 6)
model.fit(X=df[varnames],
          y=choice_var,
          varnames=varnames,
          alts=alt_var,
          ids=choice_id,
          transformation="boxcox",
          transvars=['cl'],
          fit_intercept=False)
model.summary()