Exemple #1
0
    def validate(self, estimationResults, slices=5):
        """Perform out-of-sample validation.

        The function performs the following tasks:

          - it shuffles the data set,
          - it splits the data set into slices of (approximatively) the same size,
          - each slice defines a validation set (the slice itself)
            and an estimation set (the rest of the data),
          - the model is re-estimated on the estimation set,
          - the estimated model is applied on the validation set,
          - the value of the log likelihood for each observation is reported.

        :param estimationResults: results of the model estimation based on the full data.
        :type estimationResults: biogeme.results.bioResults

        :param slices: number of slices.
        :type slices: int

        :return: a list containing as many items as slices. Each item
                 is the result of the simulation on the validation set.
        :rtype: list(pandas.DataFrame)

        """
        if self.database.isPanel():
            raise excep.biogemeError(
                'Validation for panel data is not yet implemented')
        # Split the database
        validationData = self.database.split(slices)

        keepDatabase = self.database

        allSimulationResults = []
        for v in validationData:
            # v[0] is the estimation data set
            self.database = db.Database('Estimation data', v[0])
            self.loglike.changeInitValues(estimationResults.getBetaValues())
            results = self.estimate()
            simulate = {'Loglikelihood': self.loglike}
            simBiogeme = BIOGEME(db.Database('Validation data', v[1]),
                                 simulate)
            simResult = simBiogeme.simulate(results.getBetaValues())
            allSimulationResults.append(simResult)
        self.database = keepDatabase
        if self.generatePickle:
            fname = f'{self.modelName}_validation'
            pickleFileName = bf.getNewFileName(fname, 'pickle')
            with open(pickleFileName, 'wb') as f:
                pickle.dump(allSimulationResults, f)
            self.logger.general(
                f'Simulation results saved in file {pickleFileName}')

        return allSimulationResults
Exemple #2
0
    def prep_db(self):

        if self.df is not None:
            self.database = db.Database('swissmetro', self.df)

        else:
            pandas = pd.read_table(self.data_folder + 'GEV_SM/swissmetro.dat')

            self.database = db.Database('swissmetro', pandas)

        for col in self.database.data.columns:
            exec("self.%s = Variable('%s')" % (col, col))
Exemple #3
0
    def evaluate_model(self, pandas_df_for_specified_country, model):
        # Estimation of probabilities for each alternative on aggregate. Simulate / forecast.

        def print_mode_shares(modename, modenumber):
            seriesObj = simresults.apply(lambda x: True if x['Actual choice'] == modenumber else False, axis=1)
            REAL = len(seriesObj[seriesObj == True].index)
            seriesObj = simresults.apply(lambda x: True if x['Simulated choice'] == modenumber else False, axis=1)
            SIMU = len(seriesObj[seriesObj == True].index)
            shares = (modename, '--> Real:' + "{0:.1%}".format(REAL / simresults.shape[0]) +
                    '| Simu:' + "{0:.1%}".format(SIMU / simresults.shape[0]))
            print(shares)

        biosim = bio.BIOGEME(db.Database('estimationdb', pandas_df_for_specified_country), model.structure)
        biosim.modelName = "simulated_model"
        simresults = biosim.simulate(model.betas)

        # Add a column containing the suggestion from the model
        simresults['Simulated choice'] = simresults.idxmax(axis=1)
        # Add a column containing the actual choice of the individual
        simresults['Actual choice'] = pandas_df_for_specified_country['user_choice'].to_numpy()
        # Add a column which compares the predicted against the RP choice (correct prediction = 1, wrong prediction = 0)
        simresults['Correct prediction'] = np.where(simresults['Simulated choice'] == simresults['Actual choice'], 1, 0)

        #print_mode_shares('Depart earlier', 1)
        #print_mode_shares('Depart on-time', 2)
        #print_mode_shares('Depart later  ', 3)

        return {'Model prediction accuracy': "{0:.1%}".format(simresults['Correct prediction'].mean()),
                'Rho-square': "{0:.3}".format(model.results.getGeneralStatistics()['Rho-square-bar for the init. model'][0])}
Exemple #4
0
    def predict(self, trip_data, model_for_specified_country):
        for i in range(1, 7):
            trip_data['OCC_' + str(i)] = np.where(trip_data['user_occupation'] == i, 1, 0)

        trip_data['AGE'] = self.__birthday_to_age(trip_data['user_birthday'])

        # The trip is stored in a biogeme database, since it is required by Biogeme in order for it to function
        tripdb = db.Database("SuggestionDB", trip_data)

        # Simulate / forecast
        biosuggest = bio.BIOGEME(tripdb, model_for_specified_country.structure)
        biosuggest.modelName = "suggestion_to_user"
        suggestionresults = biosuggest.simulate(model_for_specified_country.betas)
        # Get the column index number of the max probability. This is My-TRAC's recommendation. Store it in a new col.
        suggestionresults['Recommendation'] = suggestionresults.idxmax(axis=1)

        suggestion = suggestionresults.values[0]

        # print('Trip data = ', trip_data.to_json())
        # print('Results = ',
        #       {'CAR': "{0:.1%}".format(suggestion[0]),
        #        'PT': "{0:.1%}".format(suggestion[1]),
        #        'BIKE/MOTO': "{0:.1%}".format(suggestion[2]),
        #        'My-TRAC recommendation': int(suggestion[3])})

        return {'mod_car': suggestion[0],
                'mod_pt': suggestion[1],
                'mod_motbike': suggestion[2]}
    def predict(self, trip_data, model_for_specified_country):

        trip_data['AGE'] = self.__birthday_to_age(trip_data['user_birthday'])

        # The trip is stored in a biogeme database, since it is required by Biogeme in order for it to function
        tripdb = db.Database("SuggestionDB", trip_data)
        # Simulate / forecast
        biosuggest = bio.BIOGEME(tripdb, model_for_specified_country.structure)
        biosuggest.modelName = "suggestion_to_user"
        suggestionresults = biosuggest.simulate(
            model_for_specified_country.betas)
        # Get the column index number of the max probability. This is My-TRAC's recommendation. Store it in a new col.
        suggestionresults['Recommendation'] = suggestionresults.idxmax(axis=1)

        suggestion = suggestionresults.values[0]

        # print('Trip data = ', trip_data.to_json())
        # print('Results = ',
        #       {'Depart earlier': "{0:.1%}".format(suggestion[0]),
        #        'Depart on-time': "{0:.1%}".format(suggestion[1]),
        #        'Depart later': "{0:.1%}".format(suggestion[2]),
        #        'My-TRAC recommendation': int(suggestion[3])})

        return {
            'tod_earlier': suggestion[0],
            'tod_ontime': suggestion[1],
            'tod_later': suggestion[2]
        }
Exemple #6
0
    def prep_db(self):

        pandas = pd.read_csv(self.data_folder + 'LondonTravel/' + self.file)

        self.database = db.Database('LondonTravel', pandas)

        for col in self.database.data.columns:
            exec("self.%s = Variable('%s')" % (col, col))
Exemple #7
0
def train_MNL(data):
    for mode in modes_list:
        # availability
        data[mode+'_avail'] = 1
    database = db.Database("MNL_SGP", data)
    beta_dic = {}
    variables = {}

    ASC_WALK = bioexp.Beta('B___ASC___Walk',0,None,None,1) #fixed
    ASC_PT = bioexp.Beta('B___ASC___PT',0,None,None,0)
    ASC_RIDEHAIL = bioexp.Beta('B___ASC___RH',0,None,None,0)
    ASC_AV = bioexp.Beta('B___ASC___AV',0,None,None,0)
    ASC_DRIVE = bioexp.Beta('B___ASC___Drive',0,None,None,0)
    for key in att:
        beta_dic[key] = {}
        if key != 'Walk':
            for var in  z_vars:
                if var not in variables:
                    variables[var] = bioexp.Variable(var)
                beta_name = 'B___' + var + '___' + key
                beta_dic[key][beta_name] = bioexp.Beta(beta_name, 0, None, None, 0)
        for var in att[key]:
            if var not in variables:
                variables[var] = bioexp.Variable(var)
            beta_name = 'B___' + var + '___' + key
            beta_dic[key][beta_name] = bioexp.Beta(beta_name, 0, None, None, 0)


    V = {key_choice_index['Walk']:ASC_WALK, key_choice_index['PT']:ASC_PT,
         key_choice_index['RH']:ASC_RIDEHAIL,key_choice_index['AV']:ASC_AV,
         key_choice_index['Drive']:ASC_DRIVE}
    AV = {}

    for key in att:
        AV[key_choice_index[key]] = bioexp.Variable(key+'_avail')
        if key != 'Walk':
            for var in z_vars:
                beta_name = 'B___' + var + '___' + key
                V[key_choice_index[key]] += variables[var] * beta_dic[key][beta_name]
        for var in att[key]:
            beta_name = 'B___' + var + '___' + key
            V[key_choice_index[key]] += variables[var] * beta_dic[key][beta_name]
    CHOICE = bioexp.Variable('choice')
    logprob = bioexp.bioLogLogit(V, AV, CHOICE)
    formulas = {'loglike': logprob}
    biogeme = bio.BIOGEME(database, formulas,numberOfThreads = 4)
    biogeme.modelName = "MNL_SGP"
    results = biogeme.estimate()
    os.remove("MNL_SGP.html")
    os.remove("MNL_SGP.pickle")
    # Print the estimated values
    betas = results.getBetaValues()
    beta={}
    for k, v in betas.items():
        beta[k] = v

    return beta
Exemple #8
0
def predict_NL_2(betas, biogeme_file, data):
    for mode in modes_list:
        # availability
        data[mode+'_avail'] = 1
    database = db.Database("NL_SGP", data)
    # The choice model is a nested logit

    prob_Walk = biomodels.nested(biogeme_file['V'], biogeme_file['av'], biogeme_file['nests'], key_choice_index['Walk'])
    prob_PT = biomodels.nested(biogeme_file['V'], biogeme_file['av'], biogeme_file['nests'], key_choice_index['PT'])
    prob_RH = biomodels.nested(biogeme_file['V'], biogeme_file['av'], biogeme_file['nests'], key_choice_index['RH'])
    prob_AV = biomodels.nested(biogeme_file['V'], biogeme_file['av'], biogeme_file['nests'], key_choice_index['AV'])
    prob_Drive = biomodels.nested(biogeme_file['V'], biogeme_file['av'], biogeme_file['nests'], key_choice_index['Drive'])

    simulate = {'prob_Walk': prob_Walk,
                'prob_PT': prob_PT,
                'prob_RH': prob_RH,
                'prob_AV':prob_AV,
                'prob_Drive':prob_Drive}

    biogeme = bio.BIOGEME(database, simulate)


    # Extract the values that are necessary
    betaValues = betas

    # simulatedValues is a Panda dataframe with the same number of rows as
    # the database, and as many columns as formulas to simulate.
    simulatedValues = biogeme.simulate(betaValues)

    prob_list = list(simulatedValues.columns)
    data_test = data
    for key in prob_list:
        data_test[key] = 0
    data_test.loc[:,prob_list] = simulatedValues.loc[:, prob_list]
    data_test['max_prob'] = data_test[prob_list].max(axis=1)
    data_test['CHOOSE'] = 0
    for mode in key_choice_index:
        col_nameprob = 'prob_' + mode
        data_test.loc[data_test[col_nameprob]==data_test['max_prob'],'CHOOSE'] = key_choice_index[mode]

    acc = len(data_test.loc[data_test['CHOOSE']==data_test['choice']])/len(data_test)

    return acc, data_test
def train_MNL(data):
    for mode in modes_list:
        # availability
        data[mode+'_avail'] = 1
    database = db.Database("MNL_Train", data)
    beta_dic = {}
    variables = {}

    ASC_1 = bioexp.Beta('B___ASC___choice1',0,None,None,1) #fixed
    ASC_2 = bioexp.Beta('B___ASC___choice2',0,None,None,0)

    for key in att:
        beta_dic[key] = {}
        for var in att[key]:
            if var not in variables:
                variables[var] = bioexp.Variable(var)
            beta_name = 'B___' + var + '___' + key
            beta_dic[key][beta_name] = bioexp.Beta(beta_name, 0, None, None, 0)


    V = {key_choice_index['choice1']:ASC_1, key_choice_index['choice2']:ASC_2}
    AV = {}

    for key in att:
        AV[key_choice_index[key]] = bioexp.Variable(key+'_avail')
        for var in att[key]:
            beta_name = 'B___' + var + '___' + key
            V[key_choice_index[key]] += variables[var] * beta_dic[key][beta_name]
    CHOICE = bioexp.Variable('choice')
    logprob = bioexp.bioLogLogit(V, AV, CHOICE)
    formulas = {'loglike': logprob}
    biogeme = bio.BIOGEME(database, formulas,numberOfThreads = 4)
    biogeme.modelName = "MNL_Train"
    results = biogeme.estimate()
    os.remove("MNL_Train.html")
    os.remove("MNL_Train.pickle")
    # Print the estimated values
    betas = results.getBetaValues()
    beta={}
    for k, v in betas.items():
        beta[k] = v

    return beta
Exemple #10
0
    def setUp(self):
        data = {
            'ID':
            pd.Series([i + 1 for i in range(21)]),
            'AutoTime':
            pd.Series([
                52.9, 4.1, 4.1, 56.2, 51.8, 0.2, 27.6, 89.9, 41.5, 95.0, 99.1,
                18.5, 82.0, 8.6, 22.5, 51.4, 81.0, 51.0, 62.2, 95.1, 41.6
            ]),
            'TransitTime':
            pd.Series([
                4.4, 28.5, 86.9, 31.6, 20.2, 91.2, 79.7, 2.2, 24.5, 43.5, 8.4,
                84.0, 38.0, 1.6, 74.1, 83.8, 19.2, 85.0, 90.1, 22.2, 91.5
            ]),
            'Choice':
            pd.Series([
                1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0
            ])
        }
        pandas = pd.DataFrame(data)

        self.database = db.Database('akiva', pandas)
#     structBetas = structResults.getBetaValues()
#     coef [att] = {}
#     for var in Variable_name:
#         var_name = 'coef_' + var + '_' + att
#         coef[att][var_name] = structBetas[var_name]

for att in attitude_name:
    struc_equ_name = 'struc_equ_' + att
    data[struc_equ_name] = 0
    for var in Variable_name:
        coef_name = 'coef_' + var + '_' + att
        data[struc_equ_name] += data[var] * coef[att][coef_name]  #scaled

#data.to_csv('data_processed.csv',index =False)

database = db.Database("stand_along", data)

from headers import *

# exclude = (Choice == -1.0)
# database.remove(exclude)

### Variables
# Income_4000_less = DefineVariable('Income_4000_less', (INCOME<=2) + (INCOME==12),database)
# Income_12000_more = DefineVariable('Income_12000_more', (INCOME>=7)*(INCOME!=12),database)
# age_60_more = DefineVariable('age_60_more',AGE >= Numeric(60),database)
# moreThanOneCar = DefineVariable('moreThanOneCar',AUTOOWN > 2,database)
# haveLicense = DefineVariable('haveLicense',LICENSE==1,database)
# male = DefineVariable('male',SEX == 4,database)
# highEducation = DefineVariable('highEducation', EDU >= 5,database) #more than university bechelor
# fulltimeJob = DefineVariable('fulltimeJob', JOB == 1,database) #more than university bechelor
def run_estimation_2015_2020():
    """
    :author: Antonin Danalet, based on the example '01logit.py' by Michel Bierlaire, EPFL, on biogeme.epfl.ch

    A binary logit model on the possibility to work from home at least some times."""

    # Read the data
    data_file_directory = Path('../data/output/data/estimation/2015_2020/')
    df = pd.read_csv(data_file_directory / 'persons.csv', ';')
    database = db.Database('persons', df)

    # The following statement allows you to use the names of the variable as Python variable.
    globals().update(database.variables)

    # Parameters to be estimated
    alternative_specific_constant = Beta('alternative_specific_constant', 0,
                                         None, None, 0)

    b_no_post_school_education = Beta('b_no_post_school_education', 0, None,
                                      None, 0)
    b_secondary_education = Beta('b_secondary_education', 0, None, None, 0)
    b_tertiary_education = Beta('b_tertiary_education', 0, None, None, 0)

    b_male_2020 = Beta('b_male_2020', 0, None, None, 1)

    b_single_household_2020 = Beta('b_single_household_2020', 0, None, None, 1)
    b_couple_without_children_2015 = Beta('b_couple_without_children_2015', 0,
                                          None, None, 0)
    b_couple_without_children_2020 = Beta('b_couple_without_children_2020', 0,
                                          None, None, 0)
    b_couple_with_children_2020 = Beta('b_couple_with_children_2020', 0, None,
                                       None, 1)
    b_single_parent_with_children_2020 = Beta(
        'b_single_parent_with_children_2020', 0, None, None, 1)
    b_not_family_household_2020 = Beta('b_not_family_household_2020', 0, None,
                                       None, 1)

    b_public_transport_connection_quality_abc_home_2020 = Beta(
        'b_public_transport_connection_quality_abc_home_2020', 0, None, None,
        1)
    b_public_transport_connection_quality_na_home_2015 = Beta(
        'b_public_transport_connection_quality_na_home_2015', 0, None, None, 0)
    b_public_transport_connection_quality_na_home_2020 = Beta(
        'b_public_transport_connection_quality_na_home_2020', 0, None, None, 1)

    b_public_transport_connection_quality_abcd_work_2020 = Beta(
        'b_public_transport_connection_quality_abcd_work_2020', 0, None, None,
        1)

    b_urban_home_2020 = Beta('b_urban_home_2020', 0, None, None, 1)
    b_rural_home_2020 = Beta('b_rural_home_2020', 0, None, None, 1)
    b_intermediate_home_2020 = Beta('b_intermediate_home_2020', 0, None, None,
                                    1)
    b_urban_work_2020 = Beta('b_urban_work_2020', 0, None, None, 1)
    b_rural_work_2020 = Beta('b_rural_work_2020', 0, None, None, 1)
    b_intermediate_work_2020 = Beta('b_intermediate_work_2020', 0, None, None,
                                    0)

    b_home_work_distance = Beta('b_home_work_distance', 0, None, None, 0)
    b_home_work_distance_zero = Beta('b_home_work_distance_zero', 0, None,
                                     None, 0)
    b_home_work_distance_na = Beta('b_home_work_distance_na', 0, None, None, 0)

    b_business_sector_agriculture_2020 = Beta(
        'b_business_sector_agriculture_2020', 0, None, None, 1)
    b_business_sector_production = Beta('b_business_sector_production', 0,
                                        None, None, 0)
    b_business_sector_wholesale = Beta('b_business_sector_wholesale', 0, None,
                                       None, 0)
    b_business_sector_retail = Beta('b_business_sector_retail', 0, None, None,
                                    0)
    b_business_sector_gastronomy = Beta('b_business_sector_gastronomy', 0,
                                        None, None, 0)
    b_business_sector_finance = Beta('b_business_sector_finance', 0, None,
                                     None, 0)
    b_business_sector_services_fc_2020 = Beta(
        'b_business_sector_services_fc_2020', 0, None, None, 1)
    b_business_sector_other_services = Beta('b_business_sector_other_services',
                                            0, None, None, 0)
    b_business_sector_others = Beta('b_business_sector_others', 0, None, None,
                                    0)
    b_business_sector_non_movers = Beta('b_business_sector_non_movers', 0,
                                        None, None, 0)
    b_executives = Beta('b_executives', 0, None, None, 0)
    b_german = Beta('b_german', 0, None, None, 0)
    b_hh_income_na = Beta('b_hh_income_na', 0, None, None, 0)
    b_hh_income_8000_or_less = Beta('b_hh_income_8000_or_less', 0, None, None,
                                    0)

    b_owning_a_general_abo = Beta('b_owning_a_general_abo', 0, None, None, 0)
    b_regional_abo_2020 = Beta('b_regional_abo_2020', 0, None, None, 1)
    b_regional_abo_na_2020 = Beta('b_regional_abo_na_2020', 0, None, None, 1)
    b_half_fare_abo_2020 = Beta('b_half_fare_abo_2020', 0, None, None, 1)
    b_half_fare_abo_na_2020 = Beta('b_half_fare_abo_na_2020', 0, None, None, 1)
    b_car_avail_2020 = Beta('b_car_avail_2020', 0, None, None, 1)
    b_car_avail_na_2020 = Beta('b_car_avail_na_2020', 0, None, None, 1)

    b_mobility_resource_na = Beta('b_mobility_resource_na', 0, None, None, 0)
    b_mobility_resource_car_general_abo_2020 = Beta(
        'b_mobility_resource_car_general_abo_2020', 0, None, None, 1)
    b_mobility_resource_car_half_fare_abo = Beta(
        'b_mobility_resource_car_half_fare_abo', 0, None, None, 0)
    b_mobility_resource_car_2020 = Beta('b_mobility_resource_car_2020', 0,
                                        None, None, 1)
    b_mobility_resource_general_abo_no_car_2020 = Beta(
        'b_mobility_resource_general_no_car_abo_2020', 0, None, None, 0)
    b_mobility_resource_half_fare_abo_2020 = Beta(
        'b_mobility_resource_half_fare_abo_2020', 0, None, None, 1)
    b_mobility_resource_none_2020 = Beta('b_mobility_resource_none_2020', 0,
                                         None, None, 1)
    b_mobility_resource_car_half_fare_regional_abo_2020 = Beta(
        'b_mobility_resource_car_half_fare_regional_abo_2020', 0, None, None,
        1)
    b_mobility_resource_car_regional_abo_2020 = Beta(
        'b_mobility_resource_car_regional_abo_2020', 0, None, None, 1)
    b_mobility_resource_half_fare_regional_abo_2020 = Beta(
        'b_mobility_resource_half_fare_regional_abo_2020', 0, None, None, 1)
    b_mobility_resource_regional_abo_2020 = Beta(
        'b_mobility_resource_regional_abo_2020', 0, None, None, 1)

    scale_2020 = Beta('scale_2020', 1, 0.001, None, 0)
    ''' Definition of new variables '''
    male_2020 = DefineVariable('male', (sex == 1) * (year == 2020), database)

    single_household_2020 = DefineVariable('single_household_2020',
                                           (hh_type == 10) * (year == 2020),
                                           database)
    couple_without_children_2015 = DefineVariable(
        'couple_without_children_2015', (hh_type == 210) * (year == 2015),
        database)
    couple_without_children_2020 = DefineVariable(
        'couple_without_children_2020', (hh_type == 210) * (year == 2020),
        database)
    couple_with_children_2020 = DefineVariable(
        'couple_with_children_2020', (hh_type == 220) * (year == 2020),
        database)
    single_parent_with_children_2020 = DefineVariable(
        'single_parent_with_children_2020', (hh_type == 230) * (year == 2020),
        database)
    not_family_household_2020 = DefineVariable(
        'not_family_household_2020', (hh_type == 30) * (year == 2020),
        database)

    public_transport_connection_quality_abc_home_2020 = \
        DefineVariable('public_transport_connection_quality_abc_home_2020',
                       ((public_transport_connection_quality_ARE_home == 1) +
                        (public_transport_connection_quality_ARE_home == 2) +
                        (public_transport_connection_quality_ARE_home == 3)) * (year == 2020), database)
    public_transport_connection_quality_na_home_2015 = \
        DefineVariable('public_transport_connection_quality_NA_home_2015',
                       (public_transport_connection_quality_ARE_home == 5) * (year == 2015), database)
    public_transport_connection_quality_na_home_2020 = \
        DefineVariable('public_transport_connection_quality_NA_home_2020',
                       (public_transport_connection_quality_ARE_home == 5) * (year == 2020), database)

    public_transport_connection_quality_abcd_work_2020 = \
        DefineVariable('public_transport_connection_quality_abc_work_2020',
                       ((public_transport_connection_quality_ARE_work == 1) +
                        (public_transport_connection_quality_ARE_work == 2) +
                        (public_transport_connection_quality_ARE_work == 3) +
                        (public_transport_connection_quality_ARE_work == 4)) * (year == 2020), database)

    urban_home_2020 = DefineVariable(
        'urban_home_2020', (urban_typology_home == 1) * (year == 2020),
        database)
    rural_home_2020 = DefineVariable(
        'rural_home_2020', (urban_typology_home == 3) * (year == 2020),
        database)
    intermediate_home_2020 = DefineVariable(
        'intermediate_home_2020', (urban_typology_home == 2) * (year == 2020),
        database)
    urban_work_2020 = DefineVariable(
        'urban_work_2020', (urban_typology_work == 1) * (year == 2020),
        database)
    rural_work_2020 = DefineVariable(
        'rural_work_2020', (urban_typology_work == 3) * (year == 2020),
        database)
    intermediate_work_2020 = DefineVariable(
        'intermediate_work_2020', (urban_typology_work == 2) * (year == 2020),
        database)

    home_work_distance = DefineVariable(
        'home_work_distance',
        home_work_crow_fly_distance * (home_work_crow_fly_distance >= 0.0) /
        100000.0, database)
    home_work_distance_zero = DefineVariable(
        'home_work_distance_zero', home_work_crow_fly_distance == 0.0,
        database)
    home_work_distance_na = DefineVariable('home_work_distance_na',
                                           home_work_crow_fly_distance == -999,
                                           database)

    executives = DefineVariable('executives', work_position == 1, database)

    german = DefineVariable('german', language == 1, database)

    hh_income_na = DefineVariable('hh_income_na', hh_income < 0, database)
    hh_income_8000_or_less = DefineVariable(
        'hh_income_8000_or_less', (hh_income == 1) + (hh_income == 2) +
        (hh_income == 3) + (hh_income == 4), database)

    owning_a_general_abo = DefineVariable('owning_a_general_abo',
                                          GA_ticket == 1, database)
    regional_abo_2020 = DefineVariable('regional_abo_2020',
                                       (Verbund_Abo == 1) * (year == 2020),
                                       database)
    half_fare_abo_2020 = DefineVariable('half_fare_abo_2020',
                                        (halbtax_ticket == 1) * (year == 2020),
                                        database)
    car_avail_always_or_on_demand_2020 = DefineVariable(
        'car_avail_always_or_on_demand_2020',
        ((car_avail == 1) + (car_avail == 2)) * (year == 2020), database)
    regional_abo_na_2020 = DefineVariable('regional_abo_na_2020',
                                          (Verbund_Abo < 0) * (year == 2020),
                                          database)
    half_fare_abo_na_2020 = DefineVariable(
        'half_fare_abo_na_2020', (halbtax_ticket < 0) * (year == 2020),
        database)
    car_avail_na_2020 = DefineVariable('car_avail_na_2020',
                                       (car_avail < 0) * (year == 2020),
                                       database)

    mobility_resource_na = DefineVariable('mobility_resource_na',
                                          mobility_resources == -98, database)
    mobility_resource_car_general_abo_2020 = DefineVariable(
        'mobility_resource_car_general_abo_2020',
        (mobility_resources == 1) * (year == 2020), database)
    mobility_resource_car_half_fare_abo = DefineVariable(
        'mobility_resource_car_half_fare_abo', mobility_resources == 2,
        database)
    mobility_resource_car_2020 = DefineVariable('mobility_resource_car_2020',
                                                (mobility_resources == 3) *
                                                (year == 2020), database)
    mobility_resource_general_abo_no_car_2020 = DefineVariable(
        'mobility_resource_general_abo_no_car_2020',
        (mobility_resources == 4) * (year == 2020), database)
    mobility_resource_half_fare_abo_2020 = DefineVariable(
        'mobility_resource_half_fare_abo_2020',
        (mobility_resources == 5) * (year == 2020), database)
    mobility_resource_none_2020 = DefineVariable('mobility_resource_none_2020',
                                                 (mobility_resources == 6) *
                                                 (year == 2020), database)
    mobility_resource_car_half_fare_regional_abo_2020 = \
        DefineVariable('mobility_resource_car_half_fare_regional_abo_2020',
                       (mobility_resources == 20) * (year == 2020), database)
    mobility_resource_car_regional_abo_2020 = DefineVariable(
        'mobility_resource_car_regional_abo_2020',
        (mobility_resources == 30) * (year == 2020), database)
    mobility_resource_half_fare_regional_abo_2020 = DefineVariable(
        'mobility_resource_half_fare_regional_abo_2020',
        (mobility_resources == 50) * (year == 2020), database)
    mobility_resource_regional_abo_2020 = DefineVariable(
        'mobility_resource_regional_abo_2020',
        (mobility_resources == 60) * (year == 2020), database)

    business_sector_agriculture_2020 = DefineVariable(
        'business_sector_agriculture_2020',
        business_sector_agriculture * (year == 2020), database)
    business_sector_services_fc_2020 = DefineVariable(
        'business_sector_services_fc_2020',
        business_sector_services_fc * (year == 2020), database)

    #  Utility
    U = alternative_specific_constant + \
        b_executives * executives + \
        b_no_post_school_education * no_post_school_educ + \
        b_secondary_education * secondary_education + \
        b_tertiary_education * tertiary_education + \
        b_couple_without_children_2015 * couple_without_children_2015 + \
        b_couple_without_children_2020 * couple_without_children_2020 + \
        b_public_transport_connection_quality_na_home_2015 * public_transport_connection_quality_na_home_2015 + \
        b_public_transport_connection_quality_na_home_2020 * public_transport_connection_quality_na_home_2020 + \
        b_home_work_distance * home_work_distance + \
        b_home_work_distance_zero * home_work_distance_zero + \
        b_home_work_distance_na * home_work_distance_na + \
        models.piecewiseFormula(age, [15, 19, 31, 79, 85]) + \
        b_business_sector_retail * business_sector_retail + \
        b_business_sector_gastronomy * business_sector_gastronomy + \
        b_business_sector_finance * business_sector_finance + \
        b_business_sector_production * business_sector_production + \
        b_business_sector_wholesale * business_sector_wholesale + \
        b_business_sector_other_services * business_sector_other_services + \
        b_business_sector_others * business_sector_others + \
        b_business_sector_non_movers * business_sector_non_movers + \
        b_german * german + \
        models.piecewiseFormula(work_percentage, [0, 90, 101]) + \
        b_hh_income_na * hh_income_na + \
        b_hh_income_8000_or_less * hh_income_8000_or_less + \
        b_owning_a_general_abo * owning_a_general_abo + \
        b_mobility_resource_na * mobility_resource_na + \
        b_mobility_resource_car_half_fare_abo * mobility_resource_car_half_fare_abo + \
        b_male_2020 * male_2020 + \
        b_single_household_2020 * single_household_2020 + \
        b_couple_with_children_2020 * couple_with_children_2020 + \
        b_single_parent_with_children_2020 * single_parent_with_children_2020 + \
        b_not_family_household_2020 * not_family_household_2020 + \
        b_public_transport_connection_quality_abc_home_2020 * public_transport_connection_quality_abc_home_2020 + \
        b_public_transport_connection_quality_abcd_work_2020 * public_transport_connection_quality_abcd_work_2020 + \
        b_urban_home_2020 * urban_home_2020 + \
        b_rural_home_2020 * rural_home_2020 + \
        b_intermediate_home_2020 * intermediate_home_2020 + \
        b_urban_work_2020 * urban_work_2020 + \
        b_rural_work_2020 * rural_work_2020 + \
        b_intermediate_work_2020 * intermediate_work_2020 + \
        b_business_sector_agriculture_2020 * business_sector_agriculture_2020 + \
        b_business_sector_services_fc_2020 * business_sector_services_fc_2020 + \
        b_regional_abo_2020 * regional_abo_2020 + \
        b_regional_abo_na_2020 * regional_abo_na_2020 + \
        b_half_fare_abo_2020 * half_fare_abo_2020 + \
        b_half_fare_abo_na_2020 * half_fare_abo_na_2020 + \
        b_car_avail_2020 * car_avail_always_or_on_demand_2020 + \
        b_car_avail_na_2020 * car_avail_na_2020 + \
        b_mobility_resource_car_general_abo_2020 * mobility_resource_car_general_abo_2020 + \
        b_mobility_resource_car_2020 * mobility_resource_car_2020 + \
        b_mobility_resource_general_abo_no_car_2020 * mobility_resource_general_abo_no_car_2020 + \
        b_mobility_resource_half_fare_abo_2020 * mobility_resource_half_fare_abo_2020 + \
        b_mobility_resource_none_2020 * mobility_resource_none_2020 + \
        b_mobility_resource_car_half_fare_regional_abo_2020 * mobility_resource_car_half_fare_regional_abo_2020 + \
        b_mobility_resource_car_regional_abo_2020 * mobility_resource_car_regional_abo_2020 + \
        b_mobility_resource_half_fare_regional_abo_2020 * mobility_resource_half_fare_regional_abo_2020 + \
        b_mobility_resource_regional_abo_2020 * mobility_resource_regional_abo_2020
    U_no_telecommuting = 0

    # Scale associated with 2020 is estimated
    scale = (year == 2015) + (year == 2020) * scale_2020

    # Associate utility functions with the numbering of alternatives
    V = {
        1: scale * U,  # Yes or sometimes
        0: U_no_telecommuting
    }  # No

    av = {1: 1, 0: 1}

    # Definition of the model. This is the contribution of each
    # observation to the log likelihood function.
    logprob = models.loglogit(
        V,
        av,  # All alternatives are supposed to be always available
        telecommuting)  # Choice variable

    # Change the working directory, so that biogeme writes in the correct folder
    standard_directory = os.getcwd()
    output_directory = '../data/output/models/estimation/2015_2020/'
    os.chdir(output_directory)

    # Create the Biogeme object
    biogeme = bio.BIOGEME(database, logprob)
    output_file_name = 'logit_telecommuting_2015_2020'
    biogeme.modelName = output_file_name

    # Estimate the parameters
    results = biogeme.estimate()

    # Get the results in LaTeX
    results.writeLaTeX()

    # Go back to the normal working directory
    os.chdir(standard_directory)
Exemple #13
0
import pandas as pd
import biogeme.version as ver
import biogeme.biogeme as bio
import biogeme.database as db
import biogeme.messaging as msg
import biogeme.expressions as be

df = pd.read_csv("swissmetro.dat", '\t')
database = db.Database("swissmetro", df)

# Removing some observations can be done directly using pandas.
remove = (((database.data.PURPOSE != 1) & (database.data.PURPOSE != 3)) | (database.data.CHOICE == 0))
database.data.drop(database.data[remove].index, inplace=True)

# Parameters to be estimated
ASC_CAR = be.Beta('ASC_CAR', 0, None, None, 0)
ASC_TRAIN = be.Beta('ASC_TRAIN', 0, None, None, 0)
ASC_SM = be.Beta('ASC_SM', 0, None, None, 1)
B_TIME = be.Beta('B_TIME', 0, None, None, 0)
B_COST = be.Beta('B_COST', 0, None, None, 0)

# Definition of new variables
SM_COST = be.Variable('SM_CO') * (be.Variable('GA') == 0)
TRAIN_COST = be.Variable('TRAIN_CO') * (be.Variable('GA') == 0)

# Definition of new variables: adding columns to the database
CAR_AV_SP = be.DefineVariable('CAR_AV_SP', be.Variable('CAR_AV') * (be.Variable('SP') != 0), database)
TRAIN_AV_SP = be.DefineVariable('TRAIN_AV_SP', be.Variable('TRAIN_AV') * (be.Variable('SP') != 0), database)
TRAIN_TT_SCALED = be.DefineVariable('TRAIN_TT_SCALED', be.Variable('TRAIN_TT') / 100.0, database)
TRAIN_COST_SCALED = be.DefineVariable('TRAIN_COST_SCALED', TRAIN_COST / 100, database)
SM_TT_SCALED = be.DefineVariable('SM_TT_SCALED', be.Variable('SM_TT') / 100.0, database)
Exemple #14
0
import pandas as pd
import numpy as np
import biogeme.database as db
import biogeme.biogeme as bio
import biogeme.loglikelihood as ll
import biogeme.models as models
from biogeme.expressions import Beta, DefineVariable, Elem, bioMultSum, Numeric
import unittest

pandas = pd.read_csv("optima.dat", sep='\t')
database = db.Database("optima", pandas)

globals().update(database.variables)

exclude = (Choice == -1.0)
database.remove(exclude)

# Piecewise linear definition of income
# Note the use of the biogeme implementation of min and max, to avoid
# ambiguity with the python native functions.

ScaledIncome = DefineVariable('ScaledIncome',\
                              CalculatedIncome / 1000,database)
formulaIncome = models.piecewiseFormula(ScaledIncome,
                                        [None, 4, 6, 8, 10, None])

age_65_more = DefineVariable('age_65_more', age >= Numeric(65), database)
moreThanOneCar = DefineVariable('moreThanOneCar', NbCar > 1, database)
moreThanOneBike = DefineVariable('moreThanOneBike', NbBicy > 1, database)
individualHouse = DefineVariable('individualHouse',\
                                 HouseType == 1,database)
Exemple #15
0
    def setUp(self):
        longMessage = True
        self.formulas = {}
        self.models = {}
        pandas = pd.read_table("swissmetro.dat")
        self.database = db.Database("swissmetro",pandas)
        def theTriangularGenerator(size):
            return np.random.triangular(-1,0,1,size=size)

        myRandomNumberGenerators = {'TRIANGULAR':theTriangularGenerator}
        self.database.setRandomNumberGenerators(myRandomNumberGenerators)
        
        pandas = pd.read_table("swissmetro.dat")
        self.paneldatabase = db.Database("swissmetro",pandas)
        self.paneldatabase.panel("ID")

        pandas = pd.read_table("swissmetro.dat")
        self.binarydatabase = db.Database("swissmetro",pandas)
        
        # Exclude some observations
        exclude = (( PURPOSE != 1 ) * (  PURPOSE   !=  3  ) +  ( CHOICE == 0 )) > 0
        self.database.remove(exclude)
        self.paneldatabase.remove(exclude)

        CAR_AV_SP =  CAR_AV  * (  SP   !=  0  )
        TRAIN_AV_SP = TRAIN_AV  * (  SP   !=  0  )

        excludebinary = (TRAIN_AV_SP == 0) + (CAR_AV_SP == 0) + ( CHOICE == 2 ) + (( PURPOSE != 1 ) * (  PURPOSE   !=  3  ) + ( CHOICE == 0 )) > 0
        self.binarydatabase.remove(excludebinary)
        
        # Generic definitions
        ASC_CAR = Beta('ASC_CAR',1,None,None,0)
        ASC_TRAIN = Beta('ASC_TRAIN',1,None,None,0)
        ASC_SM = Beta('ASC_SM',1,None,None,1)
        B_TIME = Beta('B_TIME',1,None,None,0)
        B_COST = Beta('B_COST',1,None,None,0)

        SM_COST = SM_CO * ( GA == 0 ) 
        TRAIN_COST =  TRAIN_CO * ( GA == 0 )

        TRAIN_TT_SCALED = TRAIN_TT / 100.0
        TRAIN_COST_SCALED = TRAIN_COST / 100
        SM_TT_SCALED = SM_TT / 100.0
        SM_COST_SCALED = SM_COST / 100
        CAR_TT_SCALED = CAR_TT / 100
        CAR_CO_SCALED = CAR_CO / 100

        
        av = {1: TRAIN_AV_SP,
              2: SM_AV,
              3: CAR_AV_SP}


        modelNames = []
        V = {}
        loglike = {}
        # 01logit
        
        V["01logit"] = {1: ASC_TRAIN + 
                      B_TIME * TRAIN_TT_SCALED + 
                      B_COST * TRAIN_COST_SCALED,
                      2: ASC_SM + 
                      B_TIME * SM_TT_SCALED + 
                      B_COST * SM_COST_SCALED,
                      3: ASC_CAR + 
                      B_TIME * CAR_TT_SCALED + 
                      B_COST * CAR_CO_SCALED}

        loglike["01logit"] = bioLogLogit(V["01logit"],av,CHOICE)
        self.models["01logit"] = self.database,loglike["01logit"],-5331.252
Exemple #16
0
import pandas as pd
import biogeme.database as db
import biogeme.biogeme as bio
import biogeme.models as models
import unittest
from biogeme.expressions import Beta, DefineVariable, log, bioDraws, MonteCarlo, PanelLikelihoodTrajectory

pandas = pd.read_csv("swissmetro.dat", sep='\t')
database = db.Database("swissmetro", pandas)

database.panel("ID")

# The Pandas data structure is available as database.data. Use all the
# Pandas functions to invesigate the database
#print(database.data.describe())

globals().update(database.variables)

# Removing some observations can be done directly using pandas.
#remove = (((database.data.PURPOSE != 1) & (database.data.PURPOSE != 3)) | (database.data.CHOICE == 0))
#database.data.drop(database.data[remove].index,inplace=True)

# Here we use the "biogeme" way for backward compatibility
exclude = ((PURPOSE != 1) * (PURPOSE != 3) + (CHOICE == 0)) > 0
database.remove(exclude)

ASC_CAR = Beta('ASC_CAR', 0, None, None, 0)
ASC_TRAIN = Beta('ASC_TRAIN', 0, None, None, 0)
ASC_SM = Beta('ASC_SM', 0, None, None, 1)
B_TIME = Beta('B_TIME', 0, None, None, 0)
B_COST = Beta('B_COST', 0, None, None, 0)
"""

import sys
import pandas as pd
import biogeme.database as db
import biogeme.biogeme as bio
import biogeme.models as models
import biogeme.results as res
import biogeme.messaging as msg
from biogeme.expressions import Beta, DefineVariable, bioDraws, \
    MonteCarlo, Elem, bioNormalCdf, exp, log

# Read the data
df = pd.read_csv('optima.dat', sep='\t')
database = db.Database('optima', df)

# The following statement allows you to use the names of the variable
# as Python variable.
globals().update(database.variables)

# Exclude observations such that the chosen alternative is -1
database.remove(Choice == -1.0)

# Read the estimates from the previous estimation, and use
# them as starting values
try:
    results = res.bioResults(pickleFile='05latentChoiceFull.pickle')
except FileNotFoundError:
    print(
        'Run first the script 05latentChoiceFull.py in order to generate the file '
"""

# pylint: disable=invalid-name, undefined-variable

import pandas as pd
import biogeme.database as db
import biogeme.biogeme as bio
import biogeme.draws as draws
from biogeme.expressions import exp, bioDraws, MonteCarlo

# We create a fake database with one entry, as it is required
# to store the draws
pandas = pd.DataFrame()
pandas['FakeColumn'] = [1.0]
database = db.Database('fakeDatabase', pandas)


def halton13(sampleSize, numberOfDraws):
    """
    The user can define new draws. For example, Halton draws
    with base 13, skipping the first 10 draws.
    """
    return draws.getHaltonDraws(sampleSize, numberOfDraws, base=13, skip=10)


mydraws = {'HALTON13': (halton13, 'Halton draws, base 13, skipping 10')}
database.setRandomNumberGenerators(mydraws)

integrand = exp(bioDraws('U', 'UNIFORM'))
simulatedI = MonteCarlo(integrand)
Exemple #19
0
data_folder = "../data/"

if len(sys.argv) == 3:  # If CLI arguments provided
    n_draws, n_cores = int(sys.argv[1]), int(sys.argv[2])
else:  # Default in case CLI arguments not provided
    n_draws, n_cores = 500, 2

df = pd.read_csv(data_folder + "artificial_wide.csv")
df['choice'] = df['choice'].astype('str')
mapping = {'1': 1, '2': 2, '3': 3}

for k, v in mapping.items():
    df["aval_" + k] = np.ones(df.shape[0])
start_time = time()
df = df.replace({'choice': mapping})
database = db.Database('artificial', df)

globals().update(database.variables)

# Fixed params
b_price = Beta('b_price', 0, None, None, 0)
b_time = Beta('b_time', 0, None, None, 0)
b_conven = Beta('b_conven', 0, None, None, 0)
b_comfort = Beta('b_comfort', 0, None, None, 0)
b_nonsig1 = Beta('b_nonsig1', 0, None, None, 0)
b_nonsig2 = Beta('b_nonsig2', 0, None, None, 0)
b_nonsig3 = Beta('b_nonsig3', 0, None, None, 0)

# Random params
u_meals = Beta('u_meals', 0, None, None, 0)
u_petfr = Beta('u_petfr', 0, None, None, 0)
Exemple #20
0
    def estimate_model(self, pandas_df_for_specified_country, country):
        '''
        :param pandas_df_for_specified_country:
        :param country:
        :return: The estimated model, in a variable with 3 attributes: betas, structure, results.
        '''
        mypanda = pandas_df_for_specified_country
        for i in range(1, 7):
            mypanda['OCC_' + str(i)] = np.where(pandas_df_for_specified_country['user_occupation'] == i, 1, 0)

        # create the respective database (needed for biogeme)
        estimationdb = db.Database('estimationdb', mypanda)

        print('Training Mode Choice model for', country)

        # Alternative Specific Constants
        ASC_CAR = Beta('ASC_CAR', 0, None, None, 1)  # This ASC remains equal to zero
        ASC_PT = Beta('ASC_PT', 0, None, None, 0)
        ASC_MOT = Beta('ASC_MOT', 0, None, None, 0)
        ASC_BIKE = Beta('ASC_BIKE', 0, None, None, 0)

        # Beta variables (i.e. coefficients) - alternative specific
        BETA_TIME = Beta('BETA_TIME', 0, None, None, 0)  # Travel Time
        BETA_COST = Beta('BETA_COST', 0, None, None, 0)  # Travel Cost
        BETA_S = Beta('BETA_S', 0, None, None, 0)  # Comfort

        # Beta variables (i.e. coefficients) - traveller
        BETA_AGE_PT = Beta('BETA_AGE_PT', 0, None, None, 0)  # Age
        BETA_NCAR_PT = Beta('BETA_NCAR_PT', 0, None, None, 0)  # Number of trips by car
        BETA_NPT_PT = Beta('BETA_NPT_PT', 0, None, None, 0)  # Number of trips by pt
        BETA_GENDER_PT = Beta('BETA_GENDER_PT', 0, None, None, 0)  # Gender
        BETA_SCOPE_PT = Beta('BETA_SCOPE_PT', 0, None, None, 0)  # Trip Purpose
        BETA_OCC_1_PT = Beta('BETA_OCC_1_PT', 0, None, None, 0)  # 1:Private employee
        BETA_OCC_2_PT = Beta('BETA_OCC_2_PT', 0, None, None, 0)  # 2:Public servant
        BETA_OCC_3_PT = Beta('BETA_OCC_3_PT', 0, None, None, 0)  # 3:Self-employed
        BETA_OCC_5_PT = Beta('BETA_OCC_5_PT', 0, None, None, 0)  # 5:Retired
        BETA_OCC_6_PT = Beta('BETA_OCC_6_PT', 0, None, None, 0)  # 6:Unemployed

        BETA_AGE_BIKE = Beta('BETA_AGE_BIKE', 0, None, None, 0)  # Age
        BETA_NCAR_BIKE = Beta('BETA_NCAR_BIKE', 0, None, None, 0)  # Number of trips by car
        BETA_NPT_BIKE = Beta('BETA_NPT_BIKE', 0, None, None, 0)  # Number of trips by pt
        BETA_OCC_1_BIKE = Beta('BETA_OCC_1_BIKE', 0, None, None, 0)  # 1:Private employee
        BETA_OCC_3_BIKE = Beta('BETA_OCC_3_BIKE', 0, None, None, 0)  # 3:Self-employed
        BETA_OCC_4_BIKE = Beta('BETA_OCC_4_BIKE', 0, None, None, 0)  # 4:Student
        BETA_OCC_5_BIKE = Beta('BETA_OCC_5_BIKE', 0, None, None, 0)  # 5:Retired
        BETA_OCC_6_BIKE = Beta('BETA_OCC_6_BIKE', 0, None, None, 0)  # 6:Unemployed

        BETA_AGE_MOT = Beta('BETA_AGE_MOT', 0, None, None, 0)  # Age
        BETA_GENDER_MOT = Beta('BETA_GENDER_MOT', 0, None, None, 0)  # Gender
        BETA_SCOPE_MOT = Beta('BETA_SCOPE_MOT', 0, None, None, 0)  # Scope
        BETA_NCAR_MOT = Beta('BETA_NCAR_MOT', 0, None, None, 0)  # Number of trips by car
        BETA_NPT_MOT = Beta('BETA_NPT_MOT', 0, None, None, 0)  # Number of trips by pt
        BETA_OCC_2_MOT = Beta('BETA_OCC_2_MOT', 0, None, None, 0)  # Occupation 3
        BETA_OCC_3_MOT = Beta('BETA_OCC_3_MOT', 0, None, None, 0)  # Occupation 3
        BETA_OCC_5_MOT = Beta('BETA_OCC_5_MOT', 0, None, None, 0)  # Occupation 3
        BETA_OCC_6_MOT = Beta('BETA_OCC_6_MOT', 0, None, None, 0)  # Occupation 6

        trip_comfort_car = Variable('trip_comfort_car')
        trip_comfort_moto = Variable('trip_comfort_moto')
        trip_comfort_bike = Variable('trip_comfort_moto')  # in the training dataset, both moto and bike are under the moto variable
        trip_comfort_pt = Variable('trip_comfort_pt')
        trip_cost_car = Variable('trip_cost_car')
        trip_cost_moto = Variable('trip_cost_moto')
        trip_cost_bike = Variable('trip_cost_moto')  # in the training dataset, both moto and bike are under the moto variable
        trip_cost_pt = Variable('trip_cost_pt')
        trip_dur_car = Variable('trip_dur_car')
        trip_dur_moto = Variable('trip_dur_moto')
        trip_dur_bike = Variable('trip_dur_moto')  # in the training dataset, both moto and bike are under the moto variable
        trip_dur_pt = Variable('trip_dur_pt')
        trip_purpose = Variable('trip_purpose')
        AGE = Variable('AGE')
        user_gender = Variable('user_gender')
        user_trips_car = Variable('user_trips_car')
        user_trips_pt = Variable('user_trips_pt')
        OCC_1 = Variable('OCC_1')  # 1:Private employee
        OCC_2 = Variable('OCC_2')  # 2:Public servant
        OCC_3 = Variable('OCC_3')  # 3:Self-employed
        OCC_4 = Variable('OCC_4')  # 4:Student
        OCC_5 = Variable('OCC_5')  # 5:Retired
        OCC_6 = Variable('OCC_6')  # 6:Unemployed
        user_choice = Variable('user_choice')
        user_car_avail = Variable('user_car_avail')
        user_moto_avail = Variable('user_moto_avail')
        user_bike_avail = Variable('user_bike_avail')

        if country == 'GR' or country == 'ES':  # FIXME create a separate model for ES
            ### Definition of utility functions - one for each alternative:
            V_CAR = ASC_CAR + \
                BETA_TIME * trip_dur_car + \
                BETA_S * trip_comfort_car

            V_PT = ASC_PT + \
                BETA_TIME * trip_dur_pt+ \
                BETA_S * trip_comfort_pt + \
                BETA_SCOPE_PT * trip_purpose + \
                BETA_AGE_PT * AGE + \
                BETA_GENDER_PT * user_gender + \
                BETA_NCAR_PT * user_trips_car + \
                BETA_NPT_PT * user_trips_pt + \
                BETA_OCC_2_PT * OCC_2 + \
                BETA_OCC_5_PT * OCC_5

            V_MOT = ASC_MOT + \
                BETA_TIME * trip_dur_moto + \
                BETA_S * trip_comfort_moto + \
                BETA_SCOPE_MOT * trip_purpose + \
                BETA_AGE_MOT * AGE + \
                BETA_GENDER_MOT * user_gender + \
                BETA_NCAR_MOT * user_trips_car + \
                BETA_NPT_MOT * user_trips_pt + \
                BETA_OCC_3_MOT * OCC_3 + \
                BETA_OCC_6_MOT * OCC_6

            # Associate the availability conditions with the alternatives. (Does not really apply on ToD modelling)
            av = {1: user_car_avail,
                  2: 1,
                  3: user_moto_avail}

            # Associate utility functions with the numbering of alternatives
            V = {1: V_CAR,
                 2: V_PT,
                 3: V_MOT}

        elif country == 'NL':
            ### Definition of utility functions - one for each alternative:
            V_CAR = ASC_CAR + \
                BETA_COST * trip_cost_car + \
                BETA_TIME * trip_dur_car + \
                BETA_S * trip_comfort_car

            V_PT = ASC_PT + \
                BETA_COST * trip_cost_pt + \
                BETA_TIME * trip_dur_pt + \
                BETA_S * trip_comfort_pt + \
                BETA_AGE_PT * AGE + \
                BETA_NCAR_PT * user_trips_car + \
                BETA_NPT_PT * user_trips_pt + \
                BETA_OCC_1_PT * OCC_1 + \
                BETA_OCC_3_PT * OCC_3 + \
                BETA_OCC_5_PT * OCC_5 + \
                BETA_OCC_6_PT * OCC_6

            V_BIKE = ASC_BIKE + \
                BETA_COST * trip_cost_bike + \
                BETA_TIME * trip_dur_bike + \
                BETA_S * trip_comfort_bike + \
                BETA_AGE_BIKE * AGE + \
                BETA_NCAR_BIKE * user_trips_car + \
                BETA_NPT_BIKE * user_trips_pt + \
                BETA_OCC_1_BIKE * OCC_1 + \
                BETA_OCC_3_BIKE * OCC_3 + \
                BETA_OCC_4_BIKE * OCC_4 + \
                BETA_OCC_5_BIKE * OCC_5 + \
                BETA_OCC_6_BIKE * OCC_6

            # Associate the availability conditions with the alternatives. (Does not really apply on ToD modelling)
            av = {1: user_car_avail,
                  2: 1,
                  3: user_bike_avail}

            # Associate utility functions with the numbering of alternatives
            V = {1: V_CAR,
                 2: V_PT,
                 3: V_BIKE}

        elif country == 'PT':
            ### Definition of utility functions - one for each alternative:
            V_CAR = ASC_CAR + \
                BETA_TIME * trip_dur_car + \
                BETA_COST * trip_cost_car

            V_PT = ASC_PT + \
                BETA_TIME * trip_dur_pt + \
                BETA_COST * trip_cost_pt + \
                BETA_NCAR_PT * user_trips_car + \
                BETA_NPT_PT * user_trips_pt + \
                BETA_OCC_3_PT * OCC_3

            V_MOT = ASC_MOT + \
                BETA_TIME * trip_dur_moto + \
                BETA_COST * trip_cost_moto + \
                BETA_AGE_MOT * AGE + \
                BETA_NCAR_MOT * user_trips_car + \
                BETA_NPT_MOT * user_trips_pt + \
                BETA_OCC_2_MOT * OCC_2 + \
                BETA_OCC_3_MOT * OCC_3 + \
                BETA_OCC_5_MOT * OCC_5

            # Associate the availability conditions with the alternatives. (Does not really apply on ToD modelling)
            av = {1: user_car_avail,
                  2: 1,
                  3: user_moto_avail}

            # Associate utility functions with the numbering of alternatives
            V = {1: V_CAR,
                 2: V_PT,
                 3: V_MOT}

        else:
            V = 1
            av = 1
            print('There is no model specification for ', country)

        # The choice model is a log logit, with availability conditions
        logprob = bioLogLogit(util=V, av=av, choice=user_choice)
        biogeme = bio.BIOGEME(database=estimationdb, formulas=logprob)
        biogeme.modelName = "logitEstimation"

        # Create the outputs of the estimation and store in a namedtuple (= Model)
        results = biogeme.estimate()
        betas = results.getBetaValues()  # To be used later for the simulation of the model
        structure = {1: models.logit(V, av, 1),
                     2: models.logit(V, av, 2),
                     3: models.logit(V, av, 3)}
        Output = collections.namedtuple('Output', ['betas', 'structure', 'results'])
        Model = Output(betas, structure, results)

        self.__cleanup_after_model_training()
        # print(self.evaluate_model(pandas_df_for_specified_country, Model))
        return Model
def apply_model_to_example(df_persons, betas, output_directory_for_simulation,
                           output_file_name):
    """
    :author: Antonin Danalet, based on the example '01logit_simul.py' by Michel Bierlaire, EPFL, on biogeme.epfl.ch

    Simulation with a binary logit model. Two alternatives: work from home at least some times, or not."""

    # Read the data
    database = db.Database('persons', df_persons)

    # The following statement allows you to use the names of the variable as Python variable.
    globals().update(database.variables)

    # Parameters to be estimated
    alternative_specific_constant = Beta('alternative_specific_constant', 0,
                                         None, None, 0)

    b_no_post_school_education = Beta('b_no_post_school_education', 0, None,
                                      None, 0)
    b_secondary_education = Beta('b_secondary_education', 0, None, None, 0)
    b_tertiary_education = Beta('b_tertiary_education', 0, None, None, 0)
    b_university = Beta('b_university', 0, None, None, 1)

    b_male = Beta('b_male', 0, None, None, 0)

    b_public_transport_connection_quality_are_a_home = Beta(
        'b_public_transport_connection_quality_are_a_home', 0, None, None, 1)
    b_public_transport_connection_quality_are_b_home = Beta(
        'b_public_transport_connection_quality_are_b_home', 0, None, None, 1)
    b_public_transport_connection_quality_are_c_home = Beta(
        'b_public_transport_connection_quality_are_c_home', 0, None, None, 1)
    b_public_transport_connection_quality_are_d_home = Beta(
        'b_public_transport_connection_quality_are_d_home', 0, None, None, 1)
    b_public_transport_connection_quality_are_na_home = Beta(
        'b_public_transport_connection_quality_are_na_home', 0, None, None, 0)

    b_urban_work = Beta('b_urban_work', 0, None, None, 1)
    b_rural_work = Beta('b_rural_work', 0, None, None, 0)
    b_intermediate_work = Beta('b_intermediate_work', 0, None, None, 1)

    b_home_work_distance = Beta('b_home_work_distance', 0, None, None, 0)

    b_business_sector_agriculture = Beta('b_business_sector_agriculture', 0,
                                         None, None, 0)
    b_business_sector_production = Beta('b_business_sector_production', 0,
                                        None, None, 0)
    b_business_sector_wholesale = Beta('b_business_sector_wholesale', 0, None,
                                       None, 1)
    b_business_sector_retail = Beta('b_business_sector_retail', 0, None, None,
                                    0)
    b_business_sector_gastronomy = Beta('b_business_sector_gastronomy', 0,
                                        None, None, 0)
    b_business_sector_finance = Beta('b_business_sector_finance', 0, None,
                                     None, 1)
    b_business_sector_services_fc = Beta('b_business_sector_services_fc', 0,
                                         None, None, 0)
    b_business_sector_other_services = Beta('b_business_sector_other_services',
                                            0, None, None, 1)
    b_business_sector_others = Beta('b_business_sector_others', 0, None, None,
                                    1)
    b_business_sector_non_movers = Beta('b_business_sector_non_movers', 0,
                                        None, None, 0)
    b_employees = Beta('b_employees', 0, None, None, 1)
    b_executives = Beta('b_executives', 0, None, None, 0)
    b_german = Beta('b_german', 0, None, None, 0)
    b_nationality_ch_germany_france_italy_nw_e = Beta(
        'b_nationality_ch_germany_france_italy_nw_e', 0, None, None, 1)
    b_nationality_south_west_europe = Beta('b_nationality_south_west_europe',
                                           0, None, None, 1)
    b_nationality_southeast_europe = Beta('b_nationality_southeast_europe', 0,
                                          None, None, 1)
    b_hh_income_na = Beta('B_hh_income_na', 0, None, None, 1)
    b_hh_income_8000_or_less = Beta('b_hh_income_8000_or_less', 0, None, None,
                                    0)
    b_hh_income_more_than_8000 = Beta('b_hh_income_more_than_8000', 0, None,
                                      None, 1)

    # Definition of new variables
    no_post_school_educ = ((highest_educ == 1) | (highest_educ == 2) |
                           (highest_educ == 3) | (highest_educ == 4))
    secondary_education = ((highest_educ == 5) | (highest_educ == 6) |
                           (highest_educ == 7) | (highest_educ == 8) |
                           (highest_educ == 9) | (highest_educ == 10) |
                           (highest_educ == 11) | (highest_educ == 12))
    tertiary_education = ((highest_educ == 13) | (highest_educ == 14) |
                          (highest_educ == 15) | (highest_educ == 16))
    university = (highest_educ == 17)

    male = (sex == 1)

    public_transport_connection_quality_ARE_A_home = (
        public_transport_connection_quality_ARE_home == 1)
    public_transport_connection_quality_ARE_B_home = (
        public_transport_connection_quality_ARE_home == 2)
    public_transport_connection_quality_ARE_C_home = (
        public_transport_connection_quality_ARE_home == 3)
    public_transport_connection_quality_ARE_D_home = (
        public_transport_connection_quality_ARE_home == 4)
    public_transport_connection_quality_ARE_NA_home = (
        public_transport_connection_quality_ARE_home == 5)

    urban_work = (urban_typology_work == 1)
    rural_work = (urban_typology_work == 3)
    intermediate_work = (urban_typology_work == 2)

    home_work_distance = (home_work_crow_fly_distance *
                          (home_work_crow_fly_distance >= 0.0) / 100000.0)

    business_sector_agriculture = DefineVariable('business_sector_agriculture',
                                                 1 <= noga_08 <= 7, database)
    business_sector_retail = DefineVariable('business_sector_retail',
                                            47 <= noga_08 <= 47, database)
    business_sector_gastronomy = DefineVariable('business_sector_gastronomy',
                                                55 <= noga_08 <= 57, database)
    business_sector_finance = DefineVariable('business_sector_finance',
                                             64 <= noga_08 <= 67, database)
    business_sector_production = DefineVariable(
        'business_sector_production',
        (10 <= noga_08 <= 35) | (40 <= noga_08 <= 44), database)
    business_sector_wholesale = DefineVariable('business_sector_wholesale',
                                               (45 <= noga_08 <= 45) |
                                               (49 <= noga_08 <= 54), database)
    business_sector_services_fC = DefineVariable(
        'business_sector_services_fC',
        (60 <= noga_08 <= 63) | (69 <= noga_08 <= 83) | (noga_08 == 58),
        database)
    business_sector_other_services = DefineVariable(
        'business_sector_other_services', (86 <= noga_08 <= 90) |
        (92 <= noga_08 <= 96) | (noga_08 == 59) | (noga_08 == 68), database)
    business_sector_others = DefineVariable('business_sector_others',
                                            97 <= noga_08 <= 98, database)
    business_sector_non_movers = DefineVariable(
        'business_sector_non_movers',
        (8 <= noga_08 <= 9) | (36 <= noga_08 <= 39) | (84 <= noga_08 <= 85) |
        (noga_08 == 91) | (noga_08 == 99), database)

    employees = work_position == 2
    executives = work_position == 1

    german = language == 1

    nationality_switzerland = nation == 8100
    nationality_germany_austria_lichtenstein = (nation == 8207) + (
        nation == 8229) + (nation == 8222)
    nationality_italy_vatican = (nation == 8218) + (nation == 8241)
    nationality_france_monaco_san_marino = (nation == 8212) + (
        nation == 8226) + (nation == 8233)
    nationality_northwestern_europe = (nation == 8204) + (nation == 8223) + (nation == 8227) + (nation == 8206) + \
                                      (nation == 8211) + (nation == 8215) + (nation == 8216) + (nation == 8217) + \
                                      (nation == 8228) + (nation == 8234)
    nationality_south_west_europe = (nation == 8231) + (nation == 8236) + (
        nation == 8202)
    nationality_southeast_europe = (nation == 8224) + (nation == 8201) + (nation == 8214) + (nation == 8256) + \
                                   (nation == 8250) + (nation == 8251) + (nation == 8252) + (nation == 8255) + \
                                   (nation == 8205) + (nation == 8239) + (nation == 8242) + (nation == 8248) + \
                                   (nation == 8254)
    nationality_eastern_europe = (nation == 8230) + (nation == 8232) + (nation == 8240) + (nation == 8243) + \
                                 (nation == 8244) + (nation == 8263) + (nation == 8265) + (nation == 8266) + \
                                 (nation == 8260) + (nation == 8261) + (nation == 8262)

    # several_part_time_jobs = full_part_time_job == 3
    work_percentage = DefineVariable(
        'work_percentage',
        bioMin(
            (full_part_time_job == 1) * 100 + percentage_first_part_time_job *
            (percentage_first_part_time_job > 0),  # +
            # percentage_second_part_time_job * (percentage_second_part_time_job > 0),
            100),
        database)

    hh_income_na = hh_income == -98
    hh_income_less_than_2000 = hh_income == 1
    hh_income_2000_to_4000 = hh_income == 2
    hh_income_4001_to_6000 = hh_income == 3
    hh_income_6001_to_8000 = hh_income == 4
    hh_income_8001_to_10000 = hh_income == 5
    hh_income_10001_to_12000 = hh_income == 6
    hh_income_12001_to_14000 = hh_income == 7
    hh_income_14001_to_16000 = hh_income == 8
    hh_income_more_than_16000 = hh_income == 9

    #  Utility
    U = alternative_specific_constant + \
        b_executives * executives + \
        b_employees * employees + \
        b_no_post_school_education * no_post_school_educ + \
        b_secondary_education * secondary_education + \
        b_tertiary_education * tertiary_education + \
        b_university * university + \
        b_male * male + \
        b_public_transport_connection_quality_are_a_home * public_transport_connection_quality_ARE_A_home + \
        b_public_transport_connection_quality_are_b_home * public_transport_connection_quality_ARE_B_home + \
        b_public_transport_connection_quality_are_c_home * public_transport_connection_quality_ARE_C_home + \
        b_public_transport_connection_quality_are_d_home * public_transport_connection_quality_ARE_D_home + \
        b_public_transport_connection_quality_are_na_home * public_transport_connection_quality_ARE_NA_home + \
        b_urban_work * urban_work + \
        b_rural_work * rural_work + \
        b_intermediate_work * intermediate_work + \
        b_home_work_distance * home_work_distance + \
        models.piecewiseFormula(age, [0, 20, 35, 75, 200]) + \
        b_business_sector_agriculture * business_sector_agriculture + \
        b_business_sector_retail * business_sector_retail + \
        b_business_sector_gastronomy * business_sector_gastronomy + \
        b_business_sector_finance * business_sector_finance + \
        b_business_sector_production * business_sector_production + \
        b_business_sector_wholesale * business_sector_wholesale + \
        b_business_sector_services_fc * business_sector_services_fC + \
        b_business_sector_other_services * business_sector_other_services + \
        b_business_sector_others * business_sector_others + \
        b_business_sector_non_movers * business_sector_non_movers + \
        b_german * german + \
        b_nationality_ch_germany_france_italy_nw_e * nationality_switzerland + \
        b_nationality_ch_germany_france_italy_nw_e * nationality_germany_austria_lichtenstein + \
        b_nationality_ch_germany_france_italy_nw_e * nationality_italy_vatican + \
        b_nationality_ch_germany_france_italy_nw_e * nationality_france_monaco_san_marino + \
        b_nationality_ch_germany_france_italy_nw_e * nationality_northwestern_europe + \
        b_nationality_south_west_europe * nationality_south_west_europe + \
        b_nationality_southeast_europe * nationality_southeast_europe + \
        b_nationality_ch_germany_france_italy_nw_e * nationality_eastern_europe + \
        models.piecewiseFormula(work_percentage, [0, 90, 101]) + \
        b_hh_income_na * hh_income_na + \
        b_hh_income_8000_or_less * hh_income_less_than_2000 + \
        b_hh_income_8000_or_less * hh_income_2000_to_4000 + \
        b_hh_income_8000_or_less * hh_income_4001_to_6000 + \
        b_hh_income_8000_or_less * hh_income_6001_to_8000 + \
        b_hh_income_more_than_8000 * hh_income_8001_to_10000 + \
        b_hh_income_more_than_8000 * hh_income_10001_to_12000 + \
        b_hh_income_more_than_8000 * hh_income_12001_to_14000 + \
        b_hh_income_more_than_8000 * hh_income_14001_to_16000 + \
        b_hh_income_more_than_8000 * hh_income_more_than_16000
    U_No_telecommuting = 0

    # Associate utility functions with the numbering of alternatives
    V = {
        1: U,  # Yes or sometimes
        0: U_No_telecommuting
    }  # No

    av = {1: 1, 0: 1}

    # The choice model is a logit, with availability conditions
    prob_telecommuting = models.logit(V, av, 1)
    prob_no_telecommuting = models.logit(V, av, 0)

    simulate = {
        'Prob. telecommuting': prob_telecommuting,
        'Prob. no telecommuting': prob_no_telecommuting
    }

    # Create the Biogeme object
    biogeme = bio.BIOGEME(database, simulate)
    biogeme.modelName = 'logit_telecommuting_simul'

    # Change the working directory, so that biogeme writes in the correct folder, i.e., where this file is
    # standard_directory = os.getcwd()
    # os.chdir(output_directory_for_simulation)

    results = biogeme.simulate(theBetaValues=betas)
    # print(results.describe())
    df_persons = pd.concat([df_persons, results], axis=1)

    # Go back to the normal working directory
    # os.chdir(standard_directory)
    ''' Save the file '''
    df_persons.to_csv(output_directory_for_simulation / output_file_name,
                      sep=',',
                      index=False)
:author: Michel Bierlaire, EPFL
:date: Wed Sep 11 08:22:28 2019

"""
import pandas as pd
import numpy as np
import biogeme.database as db
import biogeme.biogeme as bio
import biogeme.models as models
import biogeme.distributions as dist
import biogeme.results as res
from biogeme.expressions import Beta, DefineVariable, Elem, Integrate, Numeric, RandomVariable, bioNormalCdf, exp, log

# Read the data
df = pd.read_csv("optima.dat", sep='\t')
database = db.Database("optima", df)

# The following statement allows you to use the names of the variable
# as Python variable.
globals().update(database.variables)

# Exclude observations such that the chosen alternative is -1
exclude = (Choice == -1.0)
database.remove(exclude)

### Variables

ScaledIncome = DefineVariable('ScaledIncome',\
                              CalculatedIncome / 1000,database)
thresholds = [0, 4, 6, 8, 10, None]
ContIncome = models.piecewiseVariables(ScaledIncome, thresholds)
Exemple #23
0
import pandas as pd
import biogeme.database as db
import biogeme.biogeme as bio
import biogeme.models as models
import biogeme.messaging as msg
from biogeme.expressions import Beta, DefineVariable, bioDraws, \
    PanelLikelihoodTrajectory, MonteCarlo, log

# Read the data
df = pd.read_csv('outside.dat', '\t')
database = db.Database('outside', df)
database.panel("ID")
# They are organized as panel data. The variable ID identifies each individual.
globals().update(database.variables)

# Parameters to be estimated
ASC_1 = Beta('ASC_1', 0, None, None, 1)
ASC_11 = Beta('ASC_11', 0, None, None, 0)
ASC_2 = Beta('ASC_2', 0, None, None, 0)
ASC_21 = Beta('ASC_21', 0, None, None, 0)
ASC_3 = Beta('ASC_3', 0, None, None, 0)
ASC_31 = Beta('ASC_31', 0, None, None, 0)
ASC_4 = Beta('ASC_4', 0, None, None, 0)
# Shared error parameters, fix the mean-parameter to 0
SIGMA_SH_MAAS_M = Beta('SIGMA_SH_MAAS_M', 0, None, None, 1)
SIGMA_SH_MAAS_STD = Beta('SIGMA_SH_MAAS_STD', 0, None, None, 0)
SIGMA_SH_MAASRND = SIGMA_SH_MAAS_M + SIGMA_SH_MAAS_STD * bioDraws(
    'SIGMA_SH_MAASRND', 'NORMAL')

beta_fam_package = Beta('beta_fam_package', 0, None, None, 0)
beta_fam_private = Beta('beta_fam_private', 0, None, None, 0)
Exemple #24
0
import pandas as pd
import biogeme.database as db
import biogeme.biogeme as bio
import biogeme.models as models
import biogeme.messaging as msg
from biogeme.expressions import Beta, DefineVariable

# Read the data
df = pd.read_csv('comboall.dat', '\t')
database = db.Database('comboall', df)

# The Pandas data structure is available as database.data. Use all the
# Pandas functions to invesigate the database
#print(database.data.describe())

# The following statement allows you to use the names of the variable
# as Python variable.
globals().update(database.variables)

# Removing some observations can be done directly using pandas.
#remove = (((database.data.PURPOSE != 1) &
#           (database.data.PURPOSE != 3)) |
#          (database.data.CHOICE == 0))
#database.data.drop(database.data[remove].index,inplace=True)

# Parameters to be estimated
ASC_1 = Beta('ASC_1', 0, None, None, 0)
ASC_11 = Beta('ASC_11', 0, None, None, 0)
ASC_2 = Beta('ASC_2', 0, None, None, 0)
ASC_21 = Beta('ASC_21', 0, None, None, 0)
ASC_3 = Beta('ASC_3', 0, None, None, 0)
# Translated to .py by Meritxell Pacheco
# 2017
# Adapted to PandasBiogeme by Michel Bierlaire
# Sun Oct 21 23:18:39 2018
# Revised by Nicola Ortelli
# Sept 2020

import pandas as pd
import biogeme.database as db
import biogeme.biogeme as bio
from biogeme.expressions import Beta, DefineVariable, log
from biogeme.models import loglogit

pandas = pd.read_csv("airline.dat", sep='\t')
database = db.Database("airline", pandas)
pd.options.display.float_format = '{:.3g}'.format

globals().update(database.variables)

# Exclude
exclude = (ArrivalTimeHours_1 == -1)
database.remove(exclude)

# Choice
chosenAlternative = (BestAlternative_1 * 1) + (BestAlternative_2 *
                                               2) + (BestAlternative_3 * 3)

# Parameters to be estimated
# Arguments:
#   1  Name for report. Typically, the same as the variable
#   2  Starting value
Exemple #26
0
#
# Not needed in test
# pylint: disable=missing-function-docstring, missing-class-docstring

import pandas as pd
import biogeme.database as db

df1 = pd.DataFrame({
    'Person': [1, 1, 1, 2, 2],
    'Exclude': [0, 0, 1, 0, 1],
    'Variable1': [1, 2, 3, 4, 5],
    'Variable2': [10, 20, 30, 40, 50],
    'Choice': [1, 2, 3, 1, 2],
    'Av1': [0, 1, 1, 1, 1],
    'Av2': [1, 1, 1, 1, 1],
    'Av3': [0, 1, 1, 1, 1]
})
myData1 = db.Database('test', df1)

df2 = pd.DataFrame({
    'Person': [1, 1, 1, 2, 2],
    'Exclude': [0, 0, 1, 0, 1],
    'Variable1': [10, 20, 30, 40, 50],
    'Variable2': [100, 200, 300, 400, 500],
    'Choice': [1, 2, 3, 1, 2],
    'Av1': [0, 1, 1, 1, 1],
    'Av2': [1, 1, 1, 1, 1],
    'Av3': [0, 1, 1, 1, 1]
})
myData2 = db.Database('test', df2)
Exemple #27
0
Estimation of a mixtures of logit models where the integral is
approximated using MonteCarlo integration, with Modified Latin
Hypercube Sampling draws.

"""

# pylint: disable=invalid-name, undefined-variable

import pandas as pd
import biogeme.database as db
import biogeme.biogeme as bio
import biogeme.models as models
from biogeme.expressions import Beta, bioDraws, MonteCarlo, log

pandas = pd.read_csv('swissmetro.dat', sep='\t')
database = db.Database('swissmetro', pandas)

# The following statement allows you to use the names of the variable
# as Python variable.
globals().update(database.variables)

# Removing some observations
exclude = ((PURPOSE != 1) * (PURPOSE != 3) + (CHOICE == 0)) > 0
database.remove(exclude)

ASC_CAR = Beta('ASC_CAR', 0, None, None, 0)
ASC_TRAIN = Beta('ASC_TRAIN', 0, None, None, 0)
ASC_SM = Beta('ASC_SM', 0, None, None, 1)
B_TIME = Beta('B_TIME', 0, None, None, 0)
B_TIME_S = Beta('B_TIME_S', 1, None, None, 0)
B_COST = Beta('B_COST', 0, None, None, 0)
Exemple #28
0
def run_simulation(data_file_directory_for_simulation, data_file_name_for_simulation, output_directory_for_simulation,
                   betas, household_income_limit):
    """
        :author: Antonin Danalet, based on the example '01logit_simul.py' by Michel Bierlaire, EPFL, on biogeme.epfl.ch

        Simulation with a binary logit model. Two alternatives: work from home at least some times, or not."""

    # Read the data
    df_persons = pd.read_csv(data_file_directory_for_simulation / data_file_name_for_simulation, ';')
    database = db.Database('persons', df_persons)

    # The following statement allows you to use the names of the variable as Python variable.
    globals().update(database.variables)

    # Parameters to be estimated
    alternative_specific_constant = Beta('alternative_specific_constant', 0, None, None, 0)
    b_no_post_school_education = Beta('b_no_post_school_education', 0, None, None, 0)
    b_secondary_education = Beta('b_secondary_education', 0, None, None, 0)
    b_tertiary_education = Beta('b_tertiary_education', 0, None, None, 0)
    b_university = Beta('b_university', 0, None, None, 1)
    b_male = Beta('b_male', 0, None, None, 0)
    b_public_transport_connection_quality_na_home = Beta('b_public_transport_connection_quality_na_home',
                                                         0, None, None, 0)
    b_public_transport_connection_quality_a_work = Beta('b_public_transport_connection_quality_are_a_work',
                                                        0, None, None, 1)
    b_rural_work = Beta('b_rural_work', 0, None, None, 0)
    b_home_work_distance = Beta('b_home_work_distance', 0, None, None, 0)
    b_business_sector_agriculture = Beta('b_business_sector_agriculture', 0, None, None, 0)
    b_business_sector_production = Beta('b_business_sector_production', 0, None, None, 0)
    b_business_sector_wholesale = Beta('b_business_sector_wholesale', 0, None, None, 1)
    b_business_sector_retail = Beta('b_business_sector_retail', 0, None, None, 0)
    b_business_sector_gastronomy = Beta('b_business_sector_gastronomy', 0, None, None, 0)
    b_business_sector_finance = Beta('b_business_sector_finance', 0, None, None, 1)
    b_business_sector_services_fc = Beta('b_business_sector_services_fc', 0, None, None, 0)
    b_business_sector_other_services = Beta('b_business_sector_other_services', 0, None, None, 1)
    b_business_sector_others = Beta('b_business_sector_others', 0, None, None, 1)
    b_business_sector_non_movers = Beta('b_business_sector_non_movers', 0, None, None, 0)
    b_executives = Beta('b_executives', 0, None, None, 0)
    b_german = Beta('b_german', 0, None, None, 0)
    b_hh_income_8000_or_less = Beta('b_hh_income_8000_or_less', 0, None, None, 0)

    # Definition of new variables
    no_post_school_educ = education == 1
    secondary_education = education == 2
    tertiary_education = education == 3
    university = education == 4

    male = (sex == 1)

    public_transport_quality_NA_home = (public_transport_connection_quality_ARE_home == 5)
    public_transport_quality_A_work = (public_transport_connection_quality_ARE_work == 1)

    home_work_distance = (home_work_crow_fly_distance * (home_work_crow_fly_distance >= 0.0) / 100000.0)

    business_sector_agriculture = type_1 == 1
    business_sector_retail = type_1 == 4
    business_sector_gastronomy = type_1 == 5
    business_sector_finance = type_1 == 6
    business_sector_production = type_1 == 2
    business_sector_wholesale = type_1 == 3
    business_sector_services_fC = type_1 == 7
    business_sector_other_services = type_1 == 8
    business_sector_others = type_1 == 9
    business_sector_non_movers = type_1 == 10
    german = language == 1
    nationality_switzerland = nation == 0
    nationality_germany_austria = nation == 1
    nationality_italy_vatican = nation == 2
    nationality_france_monaco_s_marino = nation == 3
    nationality_northwestern_europe = nation == 4
    nationality_eastern_europe = nation == 7
    hh_income_8000_or_less = hh_income < household_income_limit
    executives = (0 < position_in_bus) * (position_in_bus < 19)
    rural_work = urban_rural_typology_work == 3

    #  Utility
    utility_function_telecommuting = alternative_specific_constant + \
                                     b_executives * executives + \
                                     b_no_post_school_education * no_post_school_educ + \
                                     b_secondary_education * secondary_education + \
                                     b_tertiary_education * tertiary_education + \
                                     b_university * university + \
                                     b_male * male + \
                                     b_public_transport_connection_quality_na_home * public_transport_quality_NA_home + \
                                     b_public_transport_connection_quality_a_work * public_transport_quality_A_work + \
                                     b_rural_work * rural_work + \
                                     b_home_work_distance * home_work_distance + \
                                     models.piecewiseFormula(age, [0, 20, 35, 75, 200]) + \
                                     b_business_sector_agriculture * business_sector_agriculture + \
                                     b_business_sector_retail * business_sector_retail + \
                                     b_business_sector_gastronomy * business_sector_gastronomy + \
                                     b_business_sector_finance * business_sector_finance + \
                                     b_business_sector_production * business_sector_production + \
                                     b_business_sector_wholesale * business_sector_wholesale + \
                                     b_business_sector_services_fc * business_sector_services_fC + \
                                     b_business_sector_other_services * business_sector_other_services + \
                                     b_business_sector_others * business_sector_others + \
                                     b_business_sector_non_movers * business_sector_non_movers + \
                                     b_german * german + \
                                     b_nationality_ch_germany_france_italy_nw_e * nationality_switzerland + \
                                     b_nationality_ch_germany_france_italy_nw_e * nationality_germany_austria + \
                                     b_nationality_ch_germany_france_italy_nw_e * nationality_italy_vatican + \
                                     b_nationality_ch_germany_france_italy_nw_e * nationality_france_monaco_s_marino + \
                                     b_nationality_ch_germany_france_italy_nw_e * nationality_northwestern_europe + \
                                     b_nationality_ch_germany_france_italy_nw_e * nationality_eastern_europe + \
                                     models.piecewiseFormula(work_percentage, [0, 90, 101]) + \
                                     b_hh_income_8000_or_less * hh_income_8000_or_less
    utility_function_no_telecommuting = 0

    # Associate utility functions with the numbering of alternatives
    utility_functions_with_numbering_of_alternatives = {1: utility_function_telecommuting,  # Yes or sometimes
                                                        3: utility_function_no_telecommuting}  # No

    availability_conditions = {1: 1,  # Always available
                               3: 1}  # Always available

    # The choice model is a logit, with availability conditions
    prob_telecommuting = models.logit(utility_functions_with_numbering_of_alternatives, availability_conditions, 1)
    prob_no_telecommuting = models.logit(utility_functions_with_numbering_of_alternatives, availability_conditions, 3)

    simulate = {'Prob. telecommuting': prob_telecommuting,
                'Prob. no telecommuting': prob_no_telecommuting}

    # Create the Biogeme object
    biogeme = bio.BIOGEME(database, simulate)
    biogeme.modelName = 'logit_telecommuting_simul'

    # Define level of verbosity
    logger = msg.bioMessage()
    # logger.setSilent()
    logger.setWarning()
    # logger.setGeneral()
    # logger.setDetailed()

    # Get the betas from the estimation (without corrections)
    # path_to_estimation_folder = Path('../data/output/models/estimation/')
    # if os.path.isfile(path_to_estimation_folder / 'logit_telecommuting~00.pickle'):
    #     raise Exception('There are several model outputs! Careful.')
    # results = res.bioResults(pickleFile=path_to_estimation_folder / 'logit_telecommuting.pickle')
    # betas_without_correction = results.getBetaValues()

    # Change the working directory, so that biogeme writes in the correct folder, i.e., where this file is
    standard_directory = os.getcwd()
    os.chdir(output_directory_for_simulation)

    results = biogeme.simulate(theBetaValues=betas)
    # print(results.describe())
    df_persons = pd.concat([df_persons, results], axis=1)

    # Go back to the normal working directory
    os.chdir(standard_directory)

    # For unemployed people, fix probability of doing some home office to 0 (and probability of not doing to 1).
    df_persons.loc[df_persons.employed == 0, 'Prob. telecommuting'] = 0.0  # Unemployed people
    df_persons.loc[df_persons.employed == 0, 'Prob. no telecommuting'] = 1.0  # Unemployed people
    df_persons.loc[df_persons.employed == -99, 'Prob. telecommuting'] = 0.0  # Other people
    df_persons.loc[df_persons.employed == -99, 'Prob. no telecommuting'] = 1.0  # Other people
    # By definition, apprentices don't work from home (because they were not asked in the MTMC)
    df_persons.loc[df_persons.position_in_bus == 3, 'Prob. telecommuting'] = 0.0
    df_persons.loc[df_persons.position_in_bus == 3, 'Prob. no telecommuting'] = 1.0

    # Add a realisation of the probability
    df_persons['random 0/1'] = np.random.rand(len(df_persons))
    df_persons['telecommuting_model'] = np.where(df_persons['random 0/1'] < df_persons['Prob. telecommuting'], 1, 0)
    del df_persons['random 0/1']

    ''' Save the file '''
    data_file_name = 'persons_from_SynPop_with_probability_telecommuting.csv'
    df_persons.to_csv(output_directory_for_simulation / data_file_name, sep=',', index=False)
Exemple #29
0
 Example of a logit model, with a Box-Cox transform of variables.
 Three alternatives: Train, Car and Swissmetro
 SP data
"""

import pandas as pd
import biogeme.database as db
import biogeme.biogeme as bio
import biogeme.models as models
import biogeme.messaging as msg
from biogeme.expressions import Beta, DefineVariable

# Read the data
df = pd.read_csv('swissmetro.dat', '\t')
database = db.Database('swissmetro', df)

# The Pandas data structure is available as database.data. Use all the
# Pandas functions to invesigate the database
#print(database.data.describe())

# The following statement allows you to use the names of the variable
# as Python variable.
globals().update(database.variables)

# Removing some observations can be done directly using pandas.
#remove = (((database.data.PURPOSE != 1) &
#           (database.data.PURPOSE != 3)) |
#          (database.data.CHOICE == 0))
#database.data.drop(database.data[remove].index,inplace=True)
Exemple #30
0
from pathlib import Path
import numpy as np
import pandas as pd
pd.set_option('display.max_columns', None)

mdata_folder = Path("C:/Users/langzx/Desktop/github/DCM/data")
import biogeme.database as db
import biogeme.biogeme as bio

pandas = pd.read_table(mdata_folder / "widedata.dat")
pandas = pandas.fillna(-2)
database = db.Database("widedata", pandas)
database
database.data.describe()
pandas.columns.values

from headers import *

# Removing some observations can be done directly using pandas.
remove = (database.data.Choice == -2)
database.data.drop(database.data[remove].index, inplace=True)

### Coefficients
ASC_V = Beta('ASC_V', 0.0, -1000, 1000, 0)
ASC_C = Beta('ASC_C', 0.0, -1000, 1000, 0)
coef_wetland = Beta('coef_wetland', 0.0, -1000, 1000, 0)
coef_cc = Beta('coef_cc', 0.0, -1000, 1000, 0)
coef_nm = Beta('coef_nm ', 0.0, -1000, 1000, 0)
coef_pay = Beta('coef_pay', 0.0, -1000, 1000, 0)

### Variables