def validate(self, estimationResults, slices=5): """Perform out-of-sample validation. The function performs the following tasks: - it shuffles the data set, - it splits the data set into slices of (approximatively) the same size, - each slice defines a validation set (the slice itself) and an estimation set (the rest of the data), - the model is re-estimated on the estimation set, - the estimated model is applied on the validation set, - the value of the log likelihood for each observation is reported. :param estimationResults: results of the model estimation based on the full data. :type estimationResults: biogeme.results.bioResults :param slices: number of slices. :type slices: int :return: a list containing as many items as slices. Each item is the result of the simulation on the validation set. :rtype: list(pandas.DataFrame) """ if self.database.isPanel(): raise excep.biogemeError( 'Validation for panel data is not yet implemented') # Split the database validationData = self.database.split(slices) keepDatabase = self.database allSimulationResults = [] for v in validationData: # v[0] is the estimation data set self.database = db.Database('Estimation data', v[0]) self.loglike.changeInitValues(estimationResults.getBetaValues()) results = self.estimate() simulate = {'Loglikelihood': self.loglike} simBiogeme = BIOGEME(db.Database('Validation data', v[1]), simulate) simResult = simBiogeme.simulate(results.getBetaValues()) allSimulationResults.append(simResult) self.database = keepDatabase if self.generatePickle: fname = f'{self.modelName}_validation' pickleFileName = bf.getNewFileName(fname, 'pickle') with open(pickleFileName, 'wb') as f: pickle.dump(allSimulationResults, f) self.logger.general( f'Simulation results saved in file {pickleFileName}') return allSimulationResults
def prep_db(self): if self.df is not None: self.database = db.Database('swissmetro', self.df) else: pandas = pd.read_table(self.data_folder + 'GEV_SM/swissmetro.dat') self.database = db.Database('swissmetro', pandas) for col in self.database.data.columns: exec("self.%s = Variable('%s')" % (col, col))
def evaluate_model(self, pandas_df_for_specified_country, model): # Estimation of probabilities for each alternative on aggregate. Simulate / forecast. def print_mode_shares(modename, modenumber): seriesObj = simresults.apply(lambda x: True if x['Actual choice'] == modenumber else False, axis=1) REAL = len(seriesObj[seriesObj == True].index) seriesObj = simresults.apply(lambda x: True if x['Simulated choice'] == modenumber else False, axis=1) SIMU = len(seriesObj[seriesObj == True].index) shares = (modename, '--> Real:' + "{0:.1%}".format(REAL / simresults.shape[0]) + '| Simu:' + "{0:.1%}".format(SIMU / simresults.shape[0])) print(shares) biosim = bio.BIOGEME(db.Database('estimationdb', pandas_df_for_specified_country), model.structure) biosim.modelName = "simulated_model" simresults = biosim.simulate(model.betas) # Add a column containing the suggestion from the model simresults['Simulated choice'] = simresults.idxmax(axis=1) # Add a column containing the actual choice of the individual simresults['Actual choice'] = pandas_df_for_specified_country['user_choice'].to_numpy() # Add a column which compares the predicted against the RP choice (correct prediction = 1, wrong prediction = 0) simresults['Correct prediction'] = np.where(simresults['Simulated choice'] == simresults['Actual choice'], 1, 0) #print_mode_shares('Depart earlier', 1) #print_mode_shares('Depart on-time', 2) #print_mode_shares('Depart later ', 3) return {'Model prediction accuracy': "{0:.1%}".format(simresults['Correct prediction'].mean()), 'Rho-square': "{0:.3}".format(model.results.getGeneralStatistics()['Rho-square-bar for the init. model'][0])}
def predict(self, trip_data, model_for_specified_country): for i in range(1, 7): trip_data['OCC_' + str(i)] = np.where(trip_data['user_occupation'] == i, 1, 0) trip_data['AGE'] = self.__birthday_to_age(trip_data['user_birthday']) # The trip is stored in a biogeme database, since it is required by Biogeme in order for it to function tripdb = db.Database("SuggestionDB", trip_data) # Simulate / forecast biosuggest = bio.BIOGEME(tripdb, model_for_specified_country.structure) biosuggest.modelName = "suggestion_to_user" suggestionresults = biosuggest.simulate(model_for_specified_country.betas) # Get the column index number of the max probability. This is My-TRAC's recommendation. Store it in a new col. suggestionresults['Recommendation'] = suggestionresults.idxmax(axis=1) suggestion = suggestionresults.values[0] # print('Trip data = ', trip_data.to_json()) # print('Results = ', # {'CAR': "{0:.1%}".format(suggestion[0]), # 'PT': "{0:.1%}".format(suggestion[1]), # 'BIKE/MOTO': "{0:.1%}".format(suggestion[2]), # 'My-TRAC recommendation': int(suggestion[3])}) return {'mod_car': suggestion[0], 'mod_pt': suggestion[1], 'mod_motbike': suggestion[2]}
def predict(self, trip_data, model_for_specified_country): trip_data['AGE'] = self.__birthday_to_age(trip_data['user_birthday']) # The trip is stored in a biogeme database, since it is required by Biogeme in order for it to function tripdb = db.Database("SuggestionDB", trip_data) # Simulate / forecast biosuggest = bio.BIOGEME(tripdb, model_for_specified_country.structure) biosuggest.modelName = "suggestion_to_user" suggestionresults = biosuggest.simulate( model_for_specified_country.betas) # Get the column index number of the max probability. This is My-TRAC's recommendation. Store it in a new col. suggestionresults['Recommendation'] = suggestionresults.idxmax(axis=1) suggestion = suggestionresults.values[0] # print('Trip data = ', trip_data.to_json()) # print('Results = ', # {'Depart earlier': "{0:.1%}".format(suggestion[0]), # 'Depart on-time': "{0:.1%}".format(suggestion[1]), # 'Depart later': "{0:.1%}".format(suggestion[2]), # 'My-TRAC recommendation': int(suggestion[3])}) return { 'tod_earlier': suggestion[0], 'tod_ontime': suggestion[1], 'tod_later': suggestion[2] }
def prep_db(self): pandas = pd.read_csv(self.data_folder + 'LondonTravel/' + self.file) self.database = db.Database('LondonTravel', pandas) for col in self.database.data.columns: exec("self.%s = Variable('%s')" % (col, col))
def train_MNL(data): for mode in modes_list: # availability data[mode+'_avail'] = 1 database = db.Database("MNL_SGP", data) beta_dic = {} variables = {} ASC_WALK = bioexp.Beta('B___ASC___Walk',0,None,None,1) #fixed ASC_PT = bioexp.Beta('B___ASC___PT',0,None,None,0) ASC_RIDEHAIL = bioexp.Beta('B___ASC___RH',0,None,None,0) ASC_AV = bioexp.Beta('B___ASC___AV',0,None,None,0) ASC_DRIVE = bioexp.Beta('B___ASC___Drive',0,None,None,0) for key in att: beta_dic[key] = {} if key != 'Walk': for var in z_vars: if var not in variables: variables[var] = bioexp.Variable(var) beta_name = 'B___' + var + '___' + key beta_dic[key][beta_name] = bioexp.Beta(beta_name, 0, None, None, 0) for var in att[key]: if var not in variables: variables[var] = bioexp.Variable(var) beta_name = 'B___' + var + '___' + key beta_dic[key][beta_name] = bioexp.Beta(beta_name, 0, None, None, 0) V = {key_choice_index['Walk']:ASC_WALK, key_choice_index['PT']:ASC_PT, key_choice_index['RH']:ASC_RIDEHAIL,key_choice_index['AV']:ASC_AV, key_choice_index['Drive']:ASC_DRIVE} AV = {} for key in att: AV[key_choice_index[key]] = bioexp.Variable(key+'_avail') if key != 'Walk': for var in z_vars: beta_name = 'B___' + var + '___' + key V[key_choice_index[key]] += variables[var] * beta_dic[key][beta_name] for var in att[key]: beta_name = 'B___' + var + '___' + key V[key_choice_index[key]] += variables[var] * beta_dic[key][beta_name] CHOICE = bioexp.Variable('choice') logprob = bioexp.bioLogLogit(V, AV, CHOICE) formulas = {'loglike': logprob} biogeme = bio.BIOGEME(database, formulas,numberOfThreads = 4) biogeme.modelName = "MNL_SGP" results = biogeme.estimate() os.remove("MNL_SGP.html") os.remove("MNL_SGP.pickle") # Print the estimated values betas = results.getBetaValues() beta={} for k, v in betas.items(): beta[k] = v return beta
def predict_NL_2(betas, biogeme_file, data): for mode in modes_list: # availability data[mode+'_avail'] = 1 database = db.Database("NL_SGP", data) # The choice model is a nested logit prob_Walk = biomodels.nested(biogeme_file['V'], biogeme_file['av'], biogeme_file['nests'], key_choice_index['Walk']) prob_PT = biomodels.nested(biogeme_file['V'], biogeme_file['av'], biogeme_file['nests'], key_choice_index['PT']) prob_RH = biomodels.nested(biogeme_file['V'], biogeme_file['av'], biogeme_file['nests'], key_choice_index['RH']) prob_AV = biomodels.nested(biogeme_file['V'], biogeme_file['av'], biogeme_file['nests'], key_choice_index['AV']) prob_Drive = biomodels.nested(biogeme_file['V'], biogeme_file['av'], biogeme_file['nests'], key_choice_index['Drive']) simulate = {'prob_Walk': prob_Walk, 'prob_PT': prob_PT, 'prob_RH': prob_RH, 'prob_AV':prob_AV, 'prob_Drive':prob_Drive} biogeme = bio.BIOGEME(database, simulate) # Extract the values that are necessary betaValues = betas # simulatedValues is a Panda dataframe with the same number of rows as # the database, and as many columns as formulas to simulate. simulatedValues = biogeme.simulate(betaValues) prob_list = list(simulatedValues.columns) data_test = data for key in prob_list: data_test[key] = 0 data_test.loc[:,prob_list] = simulatedValues.loc[:, prob_list] data_test['max_prob'] = data_test[prob_list].max(axis=1) data_test['CHOOSE'] = 0 for mode in key_choice_index: col_nameprob = 'prob_' + mode data_test.loc[data_test[col_nameprob]==data_test['max_prob'],'CHOOSE'] = key_choice_index[mode] acc = len(data_test.loc[data_test['CHOOSE']==data_test['choice']])/len(data_test) return acc, data_test
def train_MNL(data): for mode in modes_list: # availability data[mode+'_avail'] = 1 database = db.Database("MNL_Train", data) beta_dic = {} variables = {} ASC_1 = bioexp.Beta('B___ASC___choice1',0,None,None,1) #fixed ASC_2 = bioexp.Beta('B___ASC___choice2',0,None,None,0) for key in att: beta_dic[key] = {} for var in att[key]: if var not in variables: variables[var] = bioexp.Variable(var) beta_name = 'B___' + var + '___' + key beta_dic[key][beta_name] = bioexp.Beta(beta_name, 0, None, None, 0) V = {key_choice_index['choice1']:ASC_1, key_choice_index['choice2']:ASC_2} AV = {} for key in att: AV[key_choice_index[key]] = bioexp.Variable(key+'_avail') for var in att[key]: beta_name = 'B___' + var + '___' + key V[key_choice_index[key]] += variables[var] * beta_dic[key][beta_name] CHOICE = bioexp.Variable('choice') logprob = bioexp.bioLogLogit(V, AV, CHOICE) formulas = {'loglike': logprob} biogeme = bio.BIOGEME(database, formulas,numberOfThreads = 4) biogeme.modelName = "MNL_Train" results = biogeme.estimate() os.remove("MNL_Train.html") os.remove("MNL_Train.pickle") # Print the estimated values betas = results.getBetaValues() beta={} for k, v in betas.items(): beta[k] = v return beta
def setUp(self): data = { 'ID': pd.Series([i + 1 for i in range(21)]), 'AutoTime': pd.Series([ 52.9, 4.1, 4.1, 56.2, 51.8, 0.2, 27.6, 89.9, 41.5, 95.0, 99.1, 18.5, 82.0, 8.6, 22.5, 51.4, 81.0, 51.0, 62.2, 95.1, 41.6 ]), 'TransitTime': pd.Series([ 4.4, 28.5, 86.9, 31.6, 20.2, 91.2, 79.7, 2.2, 24.5, 43.5, 8.4, 84.0, 38.0, 1.6, 74.1, 83.8, 19.2, 85.0, 90.1, 22.2, 91.5 ]), 'Choice': pd.Series([ 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0 ]) } pandas = pd.DataFrame(data) self.database = db.Database('akiva', pandas)
# structBetas = structResults.getBetaValues() # coef [att] = {} # for var in Variable_name: # var_name = 'coef_' + var + '_' + att # coef[att][var_name] = structBetas[var_name] for att in attitude_name: struc_equ_name = 'struc_equ_' + att data[struc_equ_name] = 0 for var in Variable_name: coef_name = 'coef_' + var + '_' + att data[struc_equ_name] += data[var] * coef[att][coef_name] #scaled #data.to_csv('data_processed.csv',index =False) database = db.Database("stand_along", data) from headers import * # exclude = (Choice == -1.0) # database.remove(exclude) ### Variables # Income_4000_less = DefineVariable('Income_4000_less', (INCOME<=2) + (INCOME==12),database) # Income_12000_more = DefineVariable('Income_12000_more', (INCOME>=7)*(INCOME!=12),database) # age_60_more = DefineVariable('age_60_more',AGE >= Numeric(60),database) # moreThanOneCar = DefineVariable('moreThanOneCar',AUTOOWN > 2,database) # haveLicense = DefineVariable('haveLicense',LICENSE==1,database) # male = DefineVariable('male',SEX == 4,database) # highEducation = DefineVariable('highEducation', EDU >= 5,database) #more than university bechelor # fulltimeJob = DefineVariable('fulltimeJob', JOB == 1,database) #more than university bechelor
def run_estimation_2015_2020(): """ :author: Antonin Danalet, based on the example '01logit.py' by Michel Bierlaire, EPFL, on biogeme.epfl.ch A binary logit model on the possibility to work from home at least some times.""" # Read the data data_file_directory = Path('../data/output/data/estimation/2015_2020/') df = pd.read_csv(data_file_directory / 'persons.csv', ';') database = db.Database('persons', df) # The following statement allows you to use the names of the variable as Python variable. globals().update(database.variables) # Parameters to be estimated alternative_specific_constant = Beta('alternative_specific_constant', 0, None, None, 0) b_no_post_school_education = Beta('b_no_post_school_education', 0, None, None, 0) b_secondary_education = Beta('b_secondary_education', 0, None, None, 0) b_tertiary_education = Beta('b_tertiary_education', 0, None, None, 0) b_male_2020 = Beta('b_male_2020', 0, None, None, 1) b_single_household_2020 = Beta('b_single_household_2020', 0, None, None, 1) b_couple_without_children_2015 = Beta('b_couple_without_children_2015', 0, None, None, 0) b_couple_without_children_2020 = Beta('b_couple_without_children_2020', 0, None, None, 0) b_couple_with_children_2020 = Beta('b_couple_with_children_2020', 0, None, None, 1) b_single_parent_with_children_2020 = Beta( 'b_single_parent_with_children_2020', 0, None, None, 1) b_not_family_household_2020 = Beta('b_not_family_household_2020', 0, None, None, 1) b_public_transport_connection_quality_abc_home_2020 = Beta( 'b_public_transport_connection_quality_abc_home_2020', 0, None, None, 1) b_public_transport_connection_quality_na_home_2015 = Beta( 'b_public_transport_connection_quality_na_home_2015', 0, None, None, 0) b_public_transport_connection_quality_na_home_2020 = Beta( 'b_public_transport_connection_quality_na_home_2020', 0, None, None, 1) b_public_transport_connection_quality_abcd_work_2020 = Beta( 'b_public_transport_connection_quality_abcd_work_2020', 0, None, None, 1) b_urban_home_2020 = Beta('b_urban_home_2020', 0, None, None, 1) b_rural_home_2020 = Beta('b_rural_home_2020', 0, None, None, 1) b_intermediate_home_2020 = Beta('b_intermediate_home_2020', 0, None, None, 1) b_urban_work_2020 = Beta('b_urban_work_2020', 0, None, None, 1) b_rural_work_2020 = Beta('b_rural_work_2020', 0, None, None, 1) b_intermediate_work_2020 = Beta('b_intermediate_work_2020', 0, None, None, 0) b_home_work_distance = Beta('b_home_work_distance', 0, None, None, 0) b_home_work_distance_zero = Beta('b_home_work_distance_zero', 0, None, None, 0) b_home_work_distance_na = Beta('b_home_work_distance_na', 0, None, None, 0) b_business_sector_agriculture_2020 = Beta( 'b_business_sector_agriculture_2020', 0, None, None, 1) b_business_sector_production = Beta('b_business_sector_production', 0, None, None, 0) b_business_sector_wholesale = Beta('b_business_sector_wholesale', 0, None, None, 0) b_business_sector_retail = Beta('b_business_sector_retail', 0, None, None, 0) b_business_sector_gastronomy = Beta('b_business_sector_gastronomy', 0, None, None, 0) b_business_sector_finance = Beta('b_business_sector_finance', 0, None, None, 0) b_business_sector_services_fc_2020 = Beta( 'b_business_sector_services_fc_2020', 0, None, None, 1) b_business_sector_other_services = Beta('b_business_sector_other_services', 0, None, None, 0) b_business_sector_others = Beta('b_business_sector_others', 0, None, None, 0) b_business_sector_non_movers = Beta('b_business_sector_non_movers', 0, None, None, 0) b_executives = Beta('b_executives', 0, None, None, 0) b_german = Beta('b_german', 0, None, None, 0) b_hh_income_na = Beta('b_hh_income_na', 0, None, None, 0) b_hh_income_8000_or_less = Beta('b_hh_income_8000_or_less', 0, None, None, 0) b_owning_a_general_abo = Beta('b_owning_a_general_abo', 0, None, None, 0) b_regional_abo_2020 = Beta('b_regional_abo_2020', 0, None, None, 1) b_regional_abo_na_2020 = Beta('b_regional_abo_na_2020', 0, None, None, 1) b_half_fare_abo_2020 = Beta('b_half_fare_abo_2020', 0, None, None, 1) b_half_fare_abo_na_2020 = Beta('b_half_fare_abo_na_2020', 0, None, None, 1) b_car_avail_2020 = Beta('b_car_avail_2020', 0, None, None, 1) b_car_avail_na_2020 = Beta('b_car_avail_na_2020', 0, None, None, 1) b_mobility_resource_na = Beta('b_mobility_resource_na', 0, None, None, 0) b_mobility_resource_car_general_abo_2020 = Beta( 'b_mobility_resource_car_general_abo_2020', 0, None, None, 1) b_mobility_resource_car_half_fare_abo = Beta( 'b_mobility_resource_car_half_fare_abo', 0, None, None, 0) b_mobility_resource_car_2020 = Beta('b_mobility_resource_car_2020', 0, None, None, 1) b_mobility_resource_general_abo_no_car_2020 = Beta( 'b_mobility_resource_general_no_car_abo_2020', 0, None, None, 0) b_mobility_resource_half_fare_abo_2020 = Beta( 'b_mobility_resource_half_fare_abo_2020', 0, None, None, 1) b_mobility_resource_none_2020 = Beta('b_mobility_resource_none_2020', 0, None, None, 1) b_mobility_resource_car_half_fare_regional_abo_2020 = Beta( 'b_mobility_resource_car_half_fare_regional_abo_2020', 0, None, None, 1) b_mobility_resource_car_regional_abo_2020 = Beta( 'b_mobility_resource_car_regional_abo_2020', 0, None, None, 1) b_mobility_resource_half_fare_regional_abo_2020 = Beta( 'b_mobility_resource_half_fare_regional_abo_2020', 0, None, None, 1) b_mobility_resource_regional_abo_2020 = Beta( 'b_mobility_resource_regional_abo_2020', 0, None, None, 1) scale_2020 = Beta('scale_2020', 1, 0.001, None, 0) ''' Definition of new variables ''' male_2020 = DefineVariable('male', (sex == 1) * (year == 2020), database) single_household_2020 = DefineVariable('single_household_2020', (hh_type == 10) * (year == 2020), database) couple_without_children_2015 = DefineVariable( 'couple_without_children_2015', (hh_type == 210) * (year == 2015), database) couple_without_children_2020 = DefineVariable( 'couple_without_children_2020', (hh_type == 210) * (year == 2020), database) couple_with_children_2020 = DefineVariable( 'couple_with_children_2020', (hh_type == 220) * (year == 2020), database) single_parent_with_children_2020 = DefineVariable( 'single_parent_with_children_2020', (hh_type == 230) * (year == 2020), database) not_family_household_2020 = DefineVariable( 'not_family_household_2020', (hh_type == 30) * (year == 2020), database) public_transport_connection_quality_abc_home_2020 = \ DefineVariable('public_transport_connection_quality_abc_home_2020', ((public_transport_connection_quality_ARE_home == 1) + (public_transport_connection_quality_ARE_home == 2) + (public_transport_connection_quality_ARE_home == 3)) * (year == 2020), database) public_transport_connection_quality_na_home_2015 = \ DefineVariable('public_transport_connection_quality_NA_home_2015', (public_transport_connection_quality_ARE_home == 5) * (year == 2015), database) public_transport_connection_quality_na_home_2020 = \ DefineVariable('public_transport_connection_quality_NA_home_2020', (public_transport_connection_quality_ARE_home == 5) * (year == 2020), database) public_transport_connection_quality_abcd_work_2020 = \ DefineVariable('public_transport_connection_quality_abc_work_2020', ((public_transport_connection_quality_ARE_work == 1) + (public_transport_connection_quality_ARE_work == 2) + (public_transport_connection_quality_ARE_work == 3) + (public_transport_connection_quality_ARE_work == 4)) * (year == 2020), database) urban_home_2020 = DefineVariable( 'urban_home_2020', (urban_typology_home == 1) * (year == 2020), database) rural_home_2020 = DefineVariable( 'rural_home_2020', (urban_typology_home == 3) * (year == 2020), database) intermediate_home_2020 = DefineVariable( 'intermediate_home_2020', (urban_typology_home == 2) * (year == 2020), database) urban_work_2020 = DefineVariable( 'urban_work_2020', (urban_typology_work == 1) * (year == 2020), database) rural_work_2020 = DefineVariable( 'rural_work_2020', (urban_typology_work == 3) * (year == 2020), database) intermediate_work_2020 = DefineVariable( 'intermediate_work_2020', (urban_typology_work == 2) * (year == 2020), database) home_work_distance = DefineVariable( 'home_work_distance', home_work_crow_fly_distance * (home_work_crow_fly_distance >= 0.0) / 100000.0, database) home_work_distance_zero = DefineVariable( 'home_work_distance_zero', home_work_crow_fly_distance == 0.0, database) home_work_distance_na = DefineVariable('home_work_distance_na', home_work_crow_fly_distance == -999, database) executives = DefineVariable('executives', work_position == 1, database) german = DefineVariable('german', language == 1, database) hh_income_na = DefineVariable('hh_income_na', hh_income < 0, database) hh_income_8000_or_less = DefineVariable( 'hh_income_8000_or_less', (hh_income == 1) + (hh_income == 2) + (hh_income == 3) + (hh_income == 4), database) owning_a_general_abo = DefineVariable('owning_a_general_abo', GA_ticket == 1, database) regional_abo_2020 = DefineVariable('regional_abo_2020', (Verbund_Abo == 1) * (year == 2020), database) half_fare_abo_2020 = DefineVariable('half_fare_abo_2020', (halbtax_ticket == 1) * (year == 2020), database) car_avail_always_or_on_demand_2020 = DefineVariable( 'car_avail_always_or_on_demand_2020', ((car_avail == 1) + (car_avail == 2)) * (year == 2020), database) regional_abo_na_2020 = DefineVariable('regional_abo_na_2020', (Verbund_Abo < 0) * (year == 2020), database) half_fare_abo_na_2020 = DefineVariable( 'half_fare_abo_na_2020', (halbtax_ticket < 0) * (year == 2020), database) car_avail_na_2020 = DefineVariable('car_avail_na_2020', (car_avail < 0) * (year == 2020), database) mobility_resource_na = DefineVariable('mobility_resource_na', mobility_resources == -98, database) mobility_resource_car_general_abo_2020 = DefineVariable( 'mobility_resource_car_general_abo_2020', (mobility_resources == 1) * (year == 2020), database) mobility_resource_car_half_fare_abo = DefineVariable( 'mobility_resource_car_half_fare_abo', mobility_resources == 2, database) mobility_resource_car_2020 = DefineVariable('mobility_resource_car_2020', (mobility_resources == 3) * (year == 2020), database) mobility_resource_general_abo_no_car_2020 = DefineVariable( 'mobility_resource_general_abo_no_car_2020', (mobility_resources == 4) * (year == 2020), database) mobility_resource_half_fare_abo_2020 = DefineVariable( 'mobility_resource_half_fare_abo_2020', (mobility_resources == 5) * (year == 2020), database) mobility_resource_none_2020 = DefineVariable('mobility_resource_none_2020', (mobility_resources == 6) * (year == 2020), database) mobility_resource_car_half_fare_regional_abo_2020 = \ DefineVariable('mobility_resource_car_half_fare_regional_abo_2020', (mobility_resources == 20) * (year == 2020), database) mobility_resource_car_regional_abo_2020 = DefineVariable( 'mobility_resource_car_regional_abo_2020', (mobility_resources == 30) * (year == 2020), database) mobility_resource_half_fare_regional_abo_2020 = DefineVariable( 'mobility_resource_half_fare_regional_abo_2020', (mobility_resources == 50) * (year == 2020), database) mobility_resource_regional_abo_2020 = DefineVariable( 'mobility_resource_regional_abo_2020', (mobility_resources == 60) * (year == 2020), database) business_sector_agriculture_2020 = DefineVariable( 'business_sector_agriculture_2020', business_sector_agriculture * (year == 2020), database) business_sector_services_fc_2020 = DefineVariable( 'business_sector_services_fc_2020', business_sector_services_fc * (year == 2020), database) # Utility U = alternative_specific_constant + \ b_executives * executives + \ b_no_post_school_education * no_post_school_educ + \ b_secondary_education * secondary_education + \ b_tertiary_education * tertiary_education + \ b_couple_without_children_2015 * couple_without_children_2015 + \ b_couple_without_children_2020 * couple_without_children_2020 + \ b_public_transport_connection_quality_na_home_2015 * public_transport_connection_quality_na_home_2015 + \ b_public_transport_connection_quality_na_home_2020 * public_transport_connection_quality_na_home_2020 + \ b_home_work_distance * home_work_distance + \ b_home_work_distance_zero * home_work_distance_zero + \ b_home_work_distance_na * home_work_distance_na + \ models.piecewiseFormula(age, [15, 19, 31, 79, 85]) + \ b_business_sector_retail * business_sector_retail + \ b_business_sector_gastronomy * business_sector_gastronomy + \ b_business_sector_finance * business_sector_finance + \ b_business_sector_production * business_sector_production + \ b_business_sector_wholesale * business_sector_wholesale + \ b_business_sector_other_services * business_sector_other_services + \ b_business_sector_others * business_sector_others + \ b_business_sector_non_movers * business_sector_non_movers + \ b_german * german + \ models.piecewiseFormula(work_percentage, [0, 90, 101]) + \ b_hh_income_na * hh_income_na + \ b_hh_income_8000_or_less * hh_income_8000_or_less + \ b_owning_a_general_abo * owning_a_general_abo + \ b_mobility_resource_na * mobility_resource_na + \ b_mobility_resource_car_half_fare_abo * mobility_resource_car_half_fare_abo + \ b_male_2020 * male_2020 + \ b_single_household_2020 * single_household_2020 + \ b_couple_with_children_2020 * couple_with_children_2020 + \ b_single_parent_with_children_2020 * single_parent_with_children_2020 + \ b_not_family_household_2020 * not_family_household_2020 + \ b_public_transport_connection_quality_abc_home_2020 * public_transport_connection_quality_abc_home_2020 + \ b_public_transport_connection_quality_abcd_work_2020 * public_transport_connection_quality_abcd_work_2020 + \ b_urban_home_2020 * urban_home_2020 + \ b_rural_home_2020 * rural_home_2020 + \ b_intermediate_home_2020 * intermediate_home_2020 + \ b_urban_work_2020 * urban_work_2020 + \ b_rural_work_2020 * rural_work_2020 + \ b_intermediate_work_2020 * intermediate_work_2020 + \ b_business_sector_agriculture_2020 * business_sector_agriculture_2020 + \ b_business_sector_services_fc_2020 * business_sector_services_fc_2020 + \ b_regional_abo_2020 * regional_abo_2020 + \ b_regional_abo_na_2020 * regional_abo_na_2020 + \ b_half_fare_abo_2020 * half_fare_abo_2020 + \ b_half_fare_abo_na_2020 * half_fare_abo_na_2020 + \ b_car_avail_2020 * car_avail_always_or_on_demand_2020 + \ b_car_avail_na_2020 * car_avail_na_2020 + \ b_mobility_resource_car_general_abo_2020 * mobility_resource_car_general_abo_2020 + \ b_mobility_resource_car_2020 * mobility_resource_car_2020 + \ b_mobility_resource_general_abo_no_car_2020 * mobility_resource_general_abo_no_car_2020 + \ b_mobility_resource_half_fare_abo_2020 * mobility_resource_half_fare_abo_2020 + \ b_mobility_resource_none_2020 * mobility_resource_none_2020 + \ b_mobility_resource_car_half_fare_regional_abo_2020 * mobility_resource_car_half_fare_regional_abo_2020 + \ b_mobility_resource_car_regional_abo_2020 * mobility_resource_car_regional_abo_2020 + \ b_mobility_resource_half_fare_regional_abo_2020 * mobility_resource_half_fare_regional_abo_2020 + \ b_mobility_resource_regional_abo_2020 * mobility_resource_regional_abo_2020 U_no_telecommuting = 0 # Scale associated with 2020 is estimated scale = (year == 2015) + (year == 2020) * scale_2020 # Associate utility functions with the numbering of alternatives V = { 1: scale * U, # Yes or sometimes 0: U_no_telecommuting } # No av = {1: 1, 0: 1} # Definition of the model. This is the contribution of each # observation to the log likelihood function. logprob = models.loglogit( V, av, # All alternatives are supposed to be always available telecommuting) # Choice variable # Change the working directory, so that biogeme writes in the correct folder standard_directory = os.getcwd() output_directory = '../data/output/models/estimation/2015_2020/' os.chdir(output_directory) # Create the Biogeme object biogeme = bio.BIOGEME(database, logprob) output_file_name = 'logit_telecommuting_2015_2020' biogeme.modelName = output_file_name # Estimate the parameters results = biogeme.estimate() # Get the results in LaTeX results.writeLaTeX() # Go back to the normal working directory os.chdir(standard_directory)
import pandas as pd import biogeme.version as ver import biogeme.biogeme as bio import biogeme.database as db import biogeme.messaging as msg import biogeme.expressions as be df = pd.read_csv("swissmetro.dat", '\t') database = db.Database("swissmetro", df) # Removing some observations can be done directly using pandas. remove = (((database.data.PURPOSE != 1) & (database.data.PURPOSE != 3)) | (database.data.CHOICE == 0)) database.data.drop(database.data[remove].index, inplace=True) # Parameters to be estimated ASC_CAR = be.Beta('ASC_CAR', 0, None, None, 0) ASC_TRAIN = be.Beta('ASC_TRAIN', 0, None, None, 0) ASC_SM = be.Beta('ASC_SM', 0, None, None, 1) B_TIME = be.Beta('B_TIME', 0, None, None, 0) B_COST = be.Beta('B_COST', 0, None, None, 0) # Definition of new variables SM_COST = be.Variable('SM_CO') * (be.Variable('GA') == 0) TRAIN_COST = be.Variable('TRAIN_CO') * (be.Variable('GA') == 0) # Definition of new variables: adding columns to the database CAR_AV_SP = be.DefineVariable('CAR_AV_SP', be.Variable('CAR_AV') * (be.Variable('SP') != 0), database) TRAIN_AV_SP = be.DefineVariable('TRAIN_AV_SP', be.Variable('TRAIN_AV') * (be.Variable('SP') != 0), database) TRAIN_TT_SCALED = be.DefineVariable('TRAIN_TT_SCALED', be.Variable('TRAIN_TT') / 100.0, database) TRAIN_COST_SCALED = be.DefineVariable('TRAIN_COST_SCALED', TRAIN_COST / 100, database) SM_TT_SCALED = be.DefineVariable('SM_TT_SCALED', be.Variable('SM_TT') / 100.0, database)
import pandas as pd import numpy as np import biogeme.database as db import biogeme.biogeme as bio import biogeme.loglikelihood as ll import biogeme.models as models from biogeme.expressions import Beta, DefineVariable, Elem, bioMultSum, Numeric import unittest pandas = pd.read_csv("optima.dat", sep='\t') database = db.Database("optima", pandas) globals().update(database.variables) exclude = (Choice == -1.0) database.remove(exclude) # Piecewise linear definition of income # Note the use of the biogeme implementation of min and max, to avoid # ambiguity with the python native functions. ScaledIncome = DefineVariable('ScaledIncome',\ CalculatedIncome / 1000,database) formulaIncome = models.piecewiseFormula(ScaledIncome, [None, 4, 6, 8, 10, None]) age_65_more = DefineVariable('age_65_more', age >= Numeric(65), database) moreThanOneCar = DefineVariable('moreThanOneCar', NbCar > 1, database) moreThanOneBike = DefineVariable('moreThanOneBike', NbBicy > 1, database) individualHouse = DefineVariable('individualHouse',\ HouseType == 1,database)
def setUp(self): longMessage = True self.formulas = {} self.models = {} pandas = pd.read_table("swissmetro.dat") self.database = db.Database("swissmetro",pandas) def theTriangularGenerator(size): return np.random.triangular(-1,0,1,size=size) myRandomNumberGenerators = {'TRIANGULAR':theTriangularGenerator} self.database.setRandomNumberGenerators(myRandomNumberGenerators) pandas = pd.read_table("swissmetro.dat") self.paneldatabase = db.Database("swissmetro",pandas) self.paneldatabase.panel("ID") pandas = pd.read_table("swissmetro.dat") self.binarydatabase = db.Database("swissmetro",pandas) # Exclude some observations exclude = (( PURPOSE != 1 ) * ( PURPOSE != 3 ) + ( CHOICE == 0 )) > 0 self.database.remove(exclude) self.paneldatabase.remove(exclude) CAR_AV_SP = CAR_AV * ( SP != 0 ) TRAIN_AV_SP = TRAIN_AV * ( SP != 0 ) excludebinary = (TRAIN_AV_SP == 0) + (CAR_AV_SP == 0) + ( CHOICE == 2 ) + (( PURPOSE != 1 ) * ( PURPOSE != 3 ) + ( CHOICE == 0 )) > 0 self.binarydatabase.remove(excludebinary) # Generic definitions ASC_CAR = Beta('ASC_CAR',1,None,None,0) ASC_TRAIN = Beta('ASC_TRAIN',1,None,None,0) ASC_SM = Beta('ASC_SM',1,None,None,1) B_TIME = Beta('B_TIME',1,None,None,0) B_COST = Beta('B_COST',1,None,None,0) SM_COST = SM_CO * ( GA == 0 ) TRAIN_COST = TRAIN_CO * ( GA == 0 ) TRAIN_TT_SCALED = TRAIN_TT / 100.0 TRAIN_COST_SCALED = TRAIN_COST / 100 SM_TT_SCALED = SM_TT / 100.0 SM_COST_SCALED = SM_COST / 100 CAR_TT_SCALED = CAR_TT / 100 CAR_CO_SCALED = CAR_CO / 100 av = {1: TRAIN_AV_SP, 2: SM_AV, 3: CAR_AV_SP} modelNames = [] V = {} loglike = {} # 01logit V["01logit"] = {1: ASC_TRAIN + B_TIME * TRAIN_TT_SCALED + B_COST * TRAIN_COST_SCALED, 2: ASC_SM + B_TIME * SM_TT_SCALED + B_COST * SM_COST_SCALED, 3: ASC_CAR + B_TIME * CAR_TT_SCALED + B_COST * CAR_CO_SCALED} loglike["01logit"] = bioLogLogit(V["01logit"],av,CHOICE) self.models["01logit"] = self.database,loglike["01logit"],-5331.252
import pandas as pd import biogeme.database as db import biogeme.biogeme as bio import biogeme.models as models import unittest from biogeme.expressions import Beta, DefineVariable, log, bioDraws, MonteCarlo, PanelLikelihoodTrajectory pandas = pd.read_csv("swissmetro.dat", sep='\t') database = db.Database("swissmetro", pandas) database.panel("ID") # The Pandas data structure is available as database.data. Use all the # Pandas functions to invesigate the database #print(database.data.describe()) globals().update(database.variables) # Removing some observations can be done directly using pandas. #remove = (((database.data.PURPOSE != 1) & (database.data.PURPOSE != 3)) | (database.data.CHOICE == 0)) #database.data.drop(database.data[remove].index,inplace=True) # Here we use the "biogeme" way for backward compatibility exclude = ((PURPOSE != 1) * (PURPOSE != 3) + (CHOICE == 0)) > 0 database.remove(exclude) ASC_CAR = Beta('ASC_CAR', 0, None, None, 0) ASC_TRAIN = Beta('ASC_TRAIN', 0, None, None, 0) ASC_SM = Beta('ASC_SM', 0, None, None, 1) B_TIME = Beta('B_TIME', 0, None, None, 0) B_COST = Beta('B_COST', 0, None, None, 0)
""" import sys import pandas as pd import biogeme.database as db import biogeme.biogeme as bio import biogeme.models as models import biogeme.results as res import biogeme.messaging as msg from biogeme.expressions import Beta, DefineVariable, bioDraws, \ MonteCarlo, Elem, bioNormalCdf, exp, log # Read the data df = pd.read_csv('optima.dat', sep='\t') database = db.Database('optima', df) # The following statement allows you to use the names of the variable # as Python variable. globals().update(database.variables) # Exclude observations such that the chosen alternative is -1 database.remove(Choice == -1.0) # Read the estimates from the previous estimation, and use # them as starting values try: results = res.bioResults(pickleFile='05latentChoiceFull.pickle') except FileNotFoundError: print( 'Run first the script 05latentChoiceFull.py in order to generate the file '
""" # pylint: disable=invalid-name, undefined-variable import pandas as pd import biogeme.database as db import biogeme.biogeme as bio import biogeme.draws as draws from biogeme.expressions import exp, bioDraws, MonteCarlo # We create a fake database with one entry, as it is required # to store the draws pandas = pd.DataFrame() pandas['FakeColumn'] = [1.0] database = db.Database('fakeDatabase', pandas) def halton13(sampleSize, numberOfDraws): """ The user can define new draws. For example, Halton draws with base 13, skipping the first 10 draws. """ return draws.getHaltonDraws(sampleSize, numberOfDraws, base=13, skip=10) mydraws = {'HALTON13': (halton13, 'Halton draws, base 13, skipping 10')} database.setRandomNumberGenerators(mydraws) integrand = exp(bioDraws('U', 'UNIFORM')) simulatedI = MonteCarlo(integrand)
data_folder = "../data/" if len(sys.argv) == 3: # If CLI arguments provided n_draws, n_cores = int(sys.argv[1]), int(sys.argv[2]) else: # Default in case CLI arguments not provided n_draws, n_cores = 500, 2 df = pd.read_csv(data_folder + "artificial_wide.csv") df['choice'] = df['choice'].astype('str') mapping = {'1': 1, '2': 2, '3': 3} for k, v in mapping.items(): df["aval_" + k] = np.ones(df.shape[0]) start_time = time() df = df.replace({'choice': mapping}) database = db.Database('artificial', df) globals().update(database.variables) # Fixed params b_price = Beta('b_price', 0, None, None, 0) b_time = Beta('b_time', 0, None, None, 0) b_conven = Beta('b_conven', 0, None, None, 0) b_comfort = Beta('b_comfort', 0, None, None, 0) b_nonsig1 = Beta('b_nonsig1', 0, None, None, 0) b_nonsig2 = Beta('b_nonsig2', 0, None, None, 0) b_nonsig3 = Beta('b_nonsig3', 0, None, None, 0) # Random params u_meals = Beta('u_meals', 0, None, None, 0) u_petfr = Beta('u_petfr', 0, None, None, 0)
def estimate_model(self, pandas_df_for_specified_country, country): ''' :param pandas_df_for_specified_country: :param country: :return: The estimated model, in a variable with 3 attributes: betas, structure, results. ''' mypanda = pandas_df_for_specified_country for i in range(1, 7): mypanda['OCC_' + str(i)] = np.where(pandas_df_for_specified_country['user_occupation'] == i, 1, 0) # create the respective database (needed for biogeme) estimationdb = db.Database('estimationdb', mypanda) print('Training Mode Choice model for', country) # Alternative Specific Constants ASC_CAR = Beta('ASC_CAR', 0, None, None, 1) # This ASC remains equal to zero ASC_PT = Beta('ASC_PT', 0, None, None, 0) ASC_MOT = Beta('ASC_MOT', 0, None, None, 0) ASC_BIKE = Beta('ASC_BIKE', 0, None, None, 0) # Beta variables (i.e. coefficients) - alternative specific BETA_TIME = Beta('BETA_TIME', 0, None, None, 0) # Travel Time BETA_COST = Beta('BETA_COST', 0, None, None, 0) # Travel Cost BETA_S = Beta('BETA_S', 0, None, None, 0) # Comfort # Beta variables (i.e. coefficients) - traveller BETA_AGE_PT = Beta('BETA_AGE_PT', 0, None, None, 0) # Age BETA_NCAR_PT = Beta('BETA_NCAR_PT', 0, None, None, 0) # Number of trips by car BETA_NPT_PT = Beta('BETA_NPT_PT', 0, None, None, 0) # Number of trips by pt BETA_GENDER_PT = Beta('BETA_GENDER_PT', 0, None, None, 0) # Gender BETA_SCOPE_PT = Beta('BETA_SCOPE_PT', 0, None, None, 0) # Trip Purpose BETA_OCC_1_PT = Beta('BETA_OCC_1_PT', 0, None, None, 0) # 1:Private employee BETA_OCC_2_PT = Beta('BETA_OCC_2_PT', 0, None, None, 0) # 2:Public servant BETA_OCC_3_PT = Beta('BETA_OCC_3_PT', 0, None, None, 0) # 3:Self-employed BETA_OCC_5_PT = Beta('BETA_OCC_5_PT', 0, None, None, 0) # 5:Retired BETA_OCC_6_PT = Beta('BETA_OCC_6_PT', 0, None, None, 0) # 6:Unemployed BETA_AGE_BIKE = Beta('BETA_AGE_BIKE', 0, None, None, 0) # Age BETA_NCAR_BIKE = Beta('BETA_NCAR_BIKE', 0, None, None, 0) # Number of trips by car BETA_NPT_BIKE = Beta('BETA_NPT_BIKE', 0, None, None, 0) # Number of trips by pt BETA_OCC_1_BIKE = Beta('BETA_OCC_1_BIKE', 0, None, None, 0) # 1:Private employee BETA_OCC_3_BIKE = Beta('BETA_OCC_3_BIKE', 0, None, None, 0) # 3:Self-employed BETA_OCC_4_BIKE = Beta('BETA_OCC_4_BIKE', 0, None, None, 0) # 4:Student BETA_OCC_5_BIKE = Beta('BETA_OCC_5_BIKE', 0, None, None, 0) # 5:Retired BETA_OCC_6_BIKE = Beta('BETA_OCC_6_BIKE', 0, None, None, 0) # 6:Unemployed BETA_AGE_MOT = Beta('BETA_AGE_MOT', 0, None, None, 0) # Age BETA_GENDER_MOT = Beta('BETA_GENDER_MOT', 0, None, None, 0) # Gender BETA_SCOPE_MOT = Beta('BETA_SCOPE_MOT', 0, None, None, 0) # Scope BETA_NCAR_MOT = Beta('BETA_NCAR_MOT', 0, None, None, 0) # Number of trips by car BETA_NPT_MOT = Beta('BETA_NPT_MOT', 0, None, None, 0) # Number of trips by pt BETA_OCC_2_MOT = Beta('BETA_OCC_2_MOT', 0, None, None, 0) # Occupation 3 BETA_OCC_3_MOT = Beta('BETA_OCC_3_MOT', 0, None, None, 0) # Occupation 3 BETA_OCC_5_MOT = Beta('BETA_OCC_5_MOT', 0, None, None, 0) # Occupation 3 BETA_OCC_6_MOT = Beta('BETA_OCC_6_MOT', 0, None, None, 0) # Occupation 6 trip_comfort_car = Variable('trip_comfort_car') trip_comfort_moto = Variable('trip_comfort_moto') trip_comfort_bike = Variable('trip_comfort_moto') # in the training dataset, both moto and bike are under the moto variable trip_comfort_pt = Variable('trip_comfort_pt') trip_cost_car = Variable('trip_cost_car') trip_cost_moto = Variable('trip_cost_moto') trip_cost_bike = Variable('trip_cost_moto') # in the training dataset, both moto and bike are under the moto variable trip_cost_pt = Variable('trip_cost_pt') trip_dur_car = Variable('trip_dur_car') trip_dur_moto = Variable('trip_dur_moto') trip_dur_bike = Variable('trip_dur_moto') # in the training dataset, both moto and bike are under the moto variable trip_dur_pt = Variable('trip_dur_pt') trip_purpose = Variable('trip_purpose') AGE = Variable('AGE') user_gender = Variable('user_gender') user_trips_car = Variable('user_trips_car') user_trips_pt = Variable('user_trips_pt') OCC_1 = Variable('OCC_1') # 1:Private employee OCC_2 = Variable('OCC_2') # 2:Public servant OCC_3 = Variable('OCC_3') # 3:Self-employed OCC_4 = Variable('OCC_4') # 4:Student OCC_5 = Variable('OCC_5') # 5:Retired OCC_6 = Variable('OCC_6') # 6:Unemployed user_choice = Variable('user_choice') user_car_avail = Variable('user_car_avail') user_moto_avail = Variable('user_moto_avail') user_bike_avail = Variable('user_bike_avail') if country == 'GR' or country == 'ES': # FIXME create a separate model for ES ### Definition of utility functions - one for each alternative: V_CAR = ASC_CAR + \ BETA_TIME * trip_dur_car + \ BETA_S * trip_comfort_car V_PT = ASC_PT + \ BETA_TIME * trip_dur_pt+ \ BETA_S * trip_comfort_pt + \ BETA_SCOPE_PT * trip_purpose + \ BETA_AGE_PT * AGE + \ BETA_GENDER_PT * user_gender + \ BETA_NCAR_PT * user_trips_car + \ BETA_NPT_PT * user_trips_pt + \ BETA_OCC_2_PT * OCC_2 + \ BETA_OCC_5_PT * OCC_5 V_MOT = ASC_MOT + \ BETA_TIME * trip_dur_moto + \ BETA_S * trip_comfort_moto + \ BETA_SCOPE_MOT * trip_purpose + \ BETA_AGE_MOT * AGE + \ BETA_GENDER_MOT * user_gender + \ BETA_NCAR_MOT * user_trips_car + \ BETA_NPT_MOT * user_trips_pt + \ BETA_OCC_3_MOT * OCC_3 + \ BETA_OCC_6_MOT * OCC_6 # Associate the availability conditions with the alternatives. (Does not really apply on ToD modelling) av = {1: user_car_avail, 2: 1, 3: user_moto_avail} # Associate utility functions with the numbering of alternatives V = {1: V_CAR, 2: V_PT, 3: V_MOT} elif country == 'NL': ### Definition of utility functions - one for each alternative: V_CAR = ASC_CAR + \ BETA_COST * trip_cost_car + \ BETA_TIME * trip_dur_car + \ BETA_S * trip_comfort_car V_PT = ASC_PT + \ BETA_COST * trip_cost_pt + \ BETA_TIME * trip_dur_pt + \ BETA_S * trip_comfort_pt + \ BETA_AGE_PT * AGE + \ BETA_NCAR_PT * user_trips_car + \ BETA_NPT_PT * user_trips_pt + \ BETA_OCC_1_PT * OCC_1 + \ BETA_OCC_3_PT * OCC_3 + \ BETA_OCC_5_PT * OCC_5 + \ BETA_OCC_6_PT * OCC_6 V_BIKE = ASC_BIKE + \ BETA_COST * trip_cost_bike + \ BETA_TIME * trip_dur_bike + \ BETA_S * trip_comfort_bike + \ BETA_AGE_BIKE * AGE + \ BETA_NCAR_BIKE * user_trips_car + \ BETA_NPT_BIKE * user_trips_pt + \ BETA_OCC_1_BIKE * OCC_1 + \ BETA_OCC_3_BIKE * OCC_3 + \ BETA_OCC_4_BIKE * OCC_4 + \ BETA_OCC_5_BIKE * OCC_5 + \ BETA_OCC_6_BIKE * OCC_6 # Associate the availability conditions with the alternatives. (Does not really apply on ToD modelling) av = {1: user_car_avail, 2: 1, 3: user_bike_avail} # Associate utility functions with the numbering of alternatives V = {1: V_CAR, 2: V_PT, 3: V_BIKE} elif country == 'PT': ### Definition of utility functions - one for each alternative: V_CAR = ASC_CAR + \ BETA_TIME * trip_dur_car + \ BETA_COST * trip_cost_car V_PT = ASC_PT + \ BETA_TIME * trip_dur_pt + \ BETA_COST * trip_cost_pt + \ BETA_NCAR_PT * user_trips_car + \ BETA_NPT_PT * user_trips_pt + \ BETA_OCC_3_PT * OCC_3 V_MOT = ASC_MOT + \ BETA_TIME * trip_dur_moto + \ BETA_COST * trip_cost_moto + \ BETA_AGE_MOT * AGE + \ BETA_NCAR_MOT * user_trips_car + \ BETA_NPT_MOT * user_trips_pt + \ BETA_OCC_2_MOT * OCC_2 + \ BETA_OCC_3_MOT * OCC_3 + \ BETA_OCC_5_MOT * OCC_5 # Associate the availability conditions with the alternatives. (Does not really apply on ToD modelling) av = {1: user_car_avail, 2: 1, 3: user_moto_avail} # Associate utility functions with the numbering of alternatives V = {1: V_CAR, 2: V_PT, 3: V_MOT} else: V = 1 av = 1 print('There is no model specification for ', country) # The choice model is a log logit, with availability conditions logprob = bioLogLogit(util=V, av=av, choice=user_choice) biogeme = bio.BIOGEME(database=estimationdb, formulas=logprob) biogeme.modelName = "logitEstimation" # Create the outputs of the estimation and store in a namedtuple (= Model) results = biogeme.estimate() betas = results.getBetaValues() # To be used later for the simulation of the model structure = {1: models.logit(V, av, 1), 2: models.logit(V, av, 2), 3: models.logit(V, av, 3)} Output = collections.namedtuple('Output', ['betas', 'structure', 'results']) Model = Output(betas, structure, results) self.__cleanup_after_model_training() # print(self.evaluate_model(pandas_df_for_specified_country, Model)) return Model
def apply_model_to_example(df_persons, betas, output_directory_for_simulation, output_file_name): """ :author: Antonin Danalet, based on the example '01logit_simul.py' by Michel Bierlaire, EPFL, on biogeme.epfl.ch Simulation with a binary logit model. Two alternatives: work from home at least some times, or not.""" # Read the data database = db.Database('persons', df_persons) # The following statement allows you to use the names of the variable as Python variable. globals().update(database.variables) # Parameters to be estimated alternative_specific_constant = Beta('alternative_specific_constant', 0, None, None, 0) b_no_post_school_education = Beta('b_no_post_school_education', 0, None, None, 0) b_secondary_education = Beta('b_secondary_education', 0, None, None, 0) b_tertiary_education = Beta('b_tertiary_education', 0, None, None, 0) b_university = Beta('b_university', 0, None, None, 1) b_male = Beta('b_male', 0, None, None, 0) b_public_transport_connection_quality_are_a_home = Beta( 'b_public_transport_connection_quality_are_a_home', 0, None, None, 1) b_public_transport_connection_quality_are_b_home = Beta( 'b_public_transport_connection_quality_are_b_home', 0, None, None, 1) b_public_transport_connection_quality_are_c_home = Beta( 'b_public_transport_connection_quality_are_c_home', 0, None, None, 1) b_public_transport_connection_quality_are_d_home = Beta( 'b_public_transport_connection_quality_are_d_home', 0, None, None, 1) b_public_transport_connection_quality_are_na_home = Beta( 'b_public_transport_connection_quality_are_na_home', 0, None, None, 0) b_urban_work = Beta('b_urban_work', 0, None, None, 1) b_rural_work = Beta('b_rural_work', 0, None, None, 0) b_intermediate_work = Beta('b_intermediate_work', 0, None, None, 1) b_home_work_distance = Beta('b_home_work_distance', 0, None, None, 0) b_business_sector_agriculture = Beta('b_business_sector_agriculture', 0, None, None, 0) b_business_sector_production = Beta('b_business_sector_production', 0, None, None, 0) b_business_sector_wholesale = Beta('b_business_sector_wholesale', 0, None, None, 1) b_business_sector_retail = Beta('b_business_sector_retail', 0, None, None, 0) b_business_sector_gastronomy = Beta('b_business_sector_gastronomy', 0, None, None, 0) b_business_sector_finance = Beta('b_business_sector_finance', 0, None, None, 1) b_business_sector_services_fc = Beta('b_business_sector_services_fc', 0, None, None, 0) b_business_sector_other_services = Beta('b_business_sector_other_services', 0, None, None, 1) b_business_sector_others = Beta('b_business_sector_others', 0, None, None, 1) b_business_sector_non_movers = Beta('b_business_sector_non_movers', 0, None, None, 0) b_employees = Beta('b_employees', 0, None, None, 1) b_executives = Beta('b_executives', 0, None, None, 0) b_german = Beta('b_german', 0, None, None, 0) b_nationality_ch_germany_france_italy_nw_e = Beta( 'b_nationality_ch_germany_france_italy_nw_e', 0, None, None, 1) b_nationality_south_west_europe = Beta('b_nationality_south_west_europe', 0, None, None, 1) b_nationality_southeast_europe = Beta('b_nationality_southeast_europe', 0, None, None, 1) b_hh_income_na = Beta('B_hh_income_na', 0, None, None, 1) b_hh_income_8000_or_less = Beta('b_hh_income_8000_or_less', 0, None, None, 0) b_hh_income_more_than_8000 = Beta('b_hh_income_more_than_8000', 0, None, None, 1) # Definition of new variables no_post_school_educ = ((highest_educ == 1) | (highest_educ == 2) | (highest_educ == 3) | (highest_educ == 4)) secondary_education = ((highest_educ == 5) | (highest_educ == 6) | (highest_educ == 7) | (highest_educ == 8) | (highest_educ == 9) | (highest_educ == 10) | (highest_educ == 11) | (highest_educ == 12)) tertiary_education = ((highest_educ == 13) | (highest_educ == 14) | (highest_educ == 15) | (highest_educ == 16)) university = (highest_educ == 17) male = (sex == 1) public_transport_connection_quality_ARE_A_home = ( public_transport_connection_quality_ARE_home == 1) public_transport_connection_quality_ARE_B_home = ( public_transport_connection_quality_ARE_home == 2) public_transport_connection_quality_ARE_C_home = ( public_transport_connection_quality_ARE_home == 3) public_transport_connection_quality_ARE_D_home = ( public_transport_connection_quality_ARE_home == 4) public_transport_connection_quality_ARE_NA_home = ( public_transport_connection_quality_ARE_home == 5) urban_work = (urban_typology_work == 1) rural_work = (urban_typology_work == 3) intermediate_work = (urban_typology_work == 2) home_work_distance = (home_work_crow_fly_distance * (home_work_crow_fly_distance >= 0.0) / 100000.0) business_sector_agriculture = DefineVariable('business_sector_agriculture', 1 <= noga_08 <= 7, database) business_sector_retail = DefineVariable('business_sector_retail', 47 <= noga_08 <= 47, database) business_sector_gastronomy = DefineVariable('business_sector_gastronomy', 55 <= noga_08 <= 57, database) business_sector_finance = DefineVariable('business_sector_finance', 64 <= noga_08 <= 67, database) business_sector_production = DefineVariable( 'business_sector_production', (10 <= noga_08 <= 35) | (40 <= noga_08 <= 44), database) business_sector_wholesale = DefineVariable('business_sector_wholesale', (45 <= noga_08 <= 45) | (49 <= noga_08 <= 54), database) business_sector_services_fC = DefineVariable( 'business_sector_services_fC', (60 <= noga_08 <= 63) | (69 <= noga_08 <= 83) | (noga_08 == 58), database) business_sector_other_services = DefineVariable( 'business_sector_other_services', (86 <= noga_08 <= 90) | (92 <= noga_08 <= 96) | (noga_08 == 59) | (noga_08 == 68), database) business_sector_others = DefineVariable('business_sector_others', 97 <= noga_08 <= 98, database) business_sector_non_movers = DefineVariable( 'business_sector_non_movers', (8 <= noga_08 <= 9) | (36 <= noga_08 <= 39) | (84 <= noga_08 <= 85) | (noga_08 == 91) | (noga_08 == 99), database) employees = work_position == 2 executives = work_position == 1 german = language == 1 nationality_switzerland = nation == 8100 nationality_germany_austria_lichtenstein = (nation == 8207) + ( nation == 8229) + (nation == 8222) nationality_italy_vatican = (nation == 8218) + (nation == 8241) nationality_france_monaco_san_marino = (nation == 8212) + ( nation == 8226) + (nation == 8233) nationality_northwestern_europe = (nation == 8204) + (nation == 8223) + (nation == 8227) + (nation == 8206) + \ (nation == 8211) + (nation == 8215) + (nation == 8216) + (nation == 8217) + \ (nation == 8228) + (nation == 8234) nationality_south_west_europe = (nation == 8231) + (nation == 8236) + ( nation == 8202) nationality_southeast_europe = (nation == 8224) + (nation == 8201) + (nation == 8214) + (nation == 8256) + \ (nation == 8250) + (nation == 8251) + (nation == 8252) + (nation == 8255) + \ (nation == 8205) + (nation == 8239) + (nation == 8242) + (nation == 8248) + \ (nation == 8254) nationality_eastern_europe = (nation == 8230) + (nation == 8232) + (nation == 8240) + (nation == 8243) + \ (nation == 8244) + (nation == 8263) + (nation == 8265) + (nation == 8266) + \ (nation == 8260) + (nation == 8261) + (nation == 8262) # several_part_time_jobs = full_part_time_job == 3 work_percentage = DefineVariable( 'work_percentage', bioMin( (full_part_time_job == 1) * 100 + percentage_first_part_time_job * (percentage_first_part_time_job > 0), # + # percentage_second_part_time_job * (percentage_second_part_time_job > 0), 100), database) hh_income_na = hh_income == -98 hh_income_less_than_2000 = hh_income == 1 hh_income_2000_to_4000 = hh_income == 2 hh_income_4001_to_6000 = hh_income == 3 hh_income_6001_to_8000 = hh_income == 4 hh_income_8001_to_10000 = hh_income == 5 hh_income_10001_to_12000 = hh_income == 6 hh_income_12001_to_14000 = hh_income == 7 hh_income_14001_to_16000 = hh_income == 8 hh_income_more_than_16000 = hh_income == 9 # Utility U = alternative_specific_constant + \ b_executives * executives + \ b_employees * employees + \ b_no_post_school_education * no_post_school_educ + \ b_secondary_education * secondary_education + \ b_tertiary_education * tertiary_education + \ b_university * university + \ b_male * male + \ b_public_transport_connection_quality_are_a_home * public_transport_connection_quality_ARE_A_home + \ b_public_transport_connection_quality_are_b_home * public_transport_connection_quality_ARE_B_home + \ b_public_transport_connection_quality_are_c_home * public_transport_connection_quality_ARE_C_home + \ b_public_transport_connection_quality_are_d_home * public_transport_connection_quality_ARE_D_home + \ b_public_transport_connection_quality_are_na_home * public_transport_connection_quality_ARE_NA_home + \ b_urban_work * urban_work + \ b_rural_work * rural_work + \ b_intermediate_work * intermediate_work + \ b_home_work_distance * home_work_distance + \ models.piecewiseFormula(age, [0, 20, 35, 75, 200]) + \ b_business_sector_agriculture * business_sector_agriculture + \ b_business_sector_retail * business_sector_retail + \ b_business_sector_gastronomy * business_sector_gastronomy + \ b_business_sector_finance * business_sector_finance + \ b_business_sector_production * business_sector_production + \ b_business_sector_wholesale * business_sector_wholesale + \ b_business_sector_services_fc * business_sector_services_fC + \ b_business_sector_other_services * business_sector_other_services + \ b_business_sector_others * business_sector_others + \ b_business_sector_non_movers * business_sector_non_movers + \ b_german * german + \ b_nationality_ch_germany_france_italy_nw_e * nationality_switzerland + \ b_nationality_ch_germany_france_italy_nw_e * nationality_germany_austria_lichtenstein + \ b_nationality_ch_germany_france_italy_nw_e * nationality_italy_vatican + \ b_nationality_ch_germany_france_italy_nw_e * nationality_france_monaco_san_marino + \ b_nationality_ch_germany_france_italy_nw_e * nationality_northwestern_europe + \ b_nationality_south_west_europe * nationality_south_west_europe + \ b_nationality_southeast_europe * nationality_southeast_europe + \ b_nationality_ch_germany_france_italy_nw_e * nationality_eastern_europe + \ models.piecewiseFormula(work_percentage, [0, 90, 101]) + \ b_hh_income_na * hh_income_na + \ b_hh_income_8000_or_less * hh_income_less_than_2000 + \ b_hh_income_8000_or_less * hh_income_2000_to_4000 + \ b_hh_income_8000_or_less * hh_income_4001_to_6000 + \ b_hh_income_8000_or_less * hh_income_6001_to_8000 + \ b_hh_income_more_than_8000 * hh_income_8001_to_10000 + \ b_hh_income_more_than_8000 * hh_income_10001_to_12000 + \ b_hh_income_more_than_8000 * hh_income_12001_to_14000 + \ b_hh_income_more_than_8000 * hh_income_14001_to_16000 + \ b_hh_income_more_than_8000 * hh_income_more_than_16000 U_No_telecommuting = 0 # Associate utility functions with the numbering of alternatives V = { 1: U, # Yes or sometimes 0: U_No_telecommuting } # No av = {1: 1, 0: 1} # The choice model is a logit, with availability conditions prob_telecommuting = models.logit(V, av, 1) prob_no_telecommuting = models.logit(V, av, 0) simulate = { 'Prob. telecommuting': prob_telecommuting, 'Prob. no telecommuting': prob_no_telecommuting } # Create the Biogeme object biogeme = bio.BIOGEME(database, simulate) biogeme.modelName = 'logit_telecommuting_simul' # Change the working directory, so that biogeme writes in the correct folder, i.e., where this file is # standard_directory = os.getcwd() # os.chdir(output_directory_for_simulation) results = biogeme.simulate(theBetaValues=betas) # print(results.describe()) df_persons = pd.concat([df_persons, results], axis=1) # Go back to the normal working directory # os.chdir(standard_directory) ''' Save the file ''' df_persons.to_csv(output_directory_for_simulation / output_file_name, sep=',', index=False)
:author: Michel Bierlaire, EPFL :date: Wed Sep 11 08:22:28 2019 """ import pandas as pd import numpy as np import biogeme.database as db import biogeme.biogeme as bio import biogeme.models as models import biogeme.distributions as dist import biogeme.results as res from biogeme.expressions import Beta, DefineVariable, Elem, Integrate, Numeric, RandomVariable, bioNormalCdf, exp, log # Read the data df = pd.read_csv("optima.dat", sep='\t') database = db.Database("optima", df) # The following statement allows you to use the names of the variable # as Python variable. globals().update(database.variables) # Exclude observations such that the chosen alternative is -1 exclude = (Choice == -1.0) database.remove(exclude) ### Variables ScaledIncome = DefineVariable('ScaledIncome',\ CalculatedIncome / 1000,database) thresholds = [0, 4, 6, 8, 10, None] ContIncome = models.piecewiseVariables(ScaledIncome, thresholds)
import pandas as pd import biogeme.database as db import biogeme.biogeme as bio import biogeme.models as models import biogeme.messaging as msg from biogeme.expressions import Beta, DefineVariable, bioDraws, \ PanelLikelihoodTrajectory, MonteCarlo, log # Read the data df = pd.read_csv('outside.dat', '\t') database = db.Database('outside', df) database.panel("ID") # They are organized as panel data. The variable ID identifies each individual. globals().update(database.variables) # Parameters to be estimated ASC_1 = Beta('ASC_1', 0, None, None, 1) ASC_11 = Beta('ASC_11', 0, None, None, 0) ASC_2 = Beta('ASC_2', 0, None, None, 0) ASC_21 = Beta('ASC_21', 0, None, None, 0) ASC_3 = Beta('ASC_3', 0, None, None, 0) ASC_31 = Beta('ASC_31', 0, None, None, 0) ASC_4 = Beta('ASC_4', 0, None, None, 0) # Shared error parameters, fix the mean-parameter to 0 SIGMA_SH_MAAS_M = Beta('SIGMA_SH_MAAS_M', 0, None, None, 1) SIGMA_SH_MAAS_STD = Beta('SIGMA_SH_MAAS_STD', 0, None, None, 0) SIGMA_SH_MAASRND = SIGMA_SH_MAAS_M + SIGMA_SH_MAAS_STD * bioDraws( 'SIGMA_SH_MAASRND', 'NORMAL') beta_fam_package = Beta('beta_fam_package', 0, None, None, 0) beta_fam_private = Beta('beta_fam_private', 0, None, None, 0)
import pandas as pd import biogeme.database as db import biogeme.biogeme as bio import biogeme.models as models import biogeme.messaging as msg from biogeme.expressions import Beta, DefineVariable # Read the data df = pd.read_csv('comboall.dat', '\t') database = db.Database('comboall', df) # The Pandas data structure is available as database.data. Use all the # Pandas functions to invesigate the database #print(database.data.describe()) # The following statement allows you to use the names of the variable # as Python variable. globals().update(database.variables) # Removing some observations can be done directly using pandas. #remove = (((database.data.PURPOSE != 1) & # (database.data.PURPOSE != 3)) | # (database.data.CHOICE == 0)) #database.data.drop(database.data[remove].index,inplace=True) # Parameters to be estimated ASC_1 = Beta('ASC_1', 0, None, None, 0) ASC_11 = Beta('ASC_11', 0, None, None, 0) ASC_2 = Beta('ASC_2', 0, None, None, 0) ASC_21 = Beta('ASC_21', 0, None, None, 0) ASC_3 = Beta('ASC_3', 0, None, None, 0)
# Translated to .py by Meritxell Pacheco # 2017 # Adapted to PandasBiogeme by Michel Bierlaire # Sun Oct 21 23:18:39 2018 # Revised by Nicola Ortelli # Sept 2020 import pandas as pd import biogeme.database as db import biogeme.biogeme as bio from biogeme.expressions import Beta, DefineVariable, log from biogeme.models import loglogit pandas = pd.read_csv("airline.dat", sep='\t') database = db.Database("airline", pandas) pd.options.display.float_format = '{:.3g}'.format globals().update(database.variables) # Exclude exclude = (ArrivalTimeHours_1 == -1) database.remove(exclude) # Choice chosenAlternative = (BestAlternative_1 * 1) + (BestAlternative_2 * 2) + (BestAlternative_3 * 3) # Parameters to be estimated # Arguments: # 1 Name for report. Typically, the same as the variable # 2 Starting value
# # Not needed in test # pylint: disable=missing-function-docstring, missing-class-docstring import pandas as pd import biogeme.database as db df1 = pd.DataFrame({ 'Person': [1, 1, 1, 2, 2], 'Exclude': [0, 0, 1, 0, 1], 'Variable1': [1, 2, 3, 4, 5], 'Variable2': [10, 20, 30, 40, 50], 'Choice': [1, 2, 3, 1, 2], 'Av1': [0, 1, 1, 1, 1], 'Av2': [1, 1, 1, 1, 1], 'Av3': [0, 1, 1, 1, 1] }) myData1 = db.Database('test', df1) df2 = pd.DataFrame({ 'Person': [1, 1, 1, 2, 2], 'Exclude': [0, 0, 1, 0, 1], 'Variable1': [10, 20, 30, 40, 50], 'Variable2': [100, 200, 300, 400, 500], 'Choice': [1, 2, 3, 1, 2], 'Av1': [0, 1, 1, 1, 1], 'Av2': [1, 1, 1, 1, 1], 'Av3': [0, 1, 1, 1, 1] }) myData2 = db.Database('test', df2)
Estimation of a mixtures of logit models where the integral is approximated using MonteCarlo integration, with Modified Latin Hypercube Sampling draws. """ # pylint: disable=invalid-name, undefined-variable import pandas as pd import biogeme.database as db import biogeme.biogeme as bio import biogeme.models as models from biogeme.expressions import Beta, bioDraws, MonteCarlo, log pandas = pd.read_csv('swissmetro.dat', sep='\t') database = db.Database('swissmetro', pandas) # The following statement allows you to use the names of the variable # as Python variable. globals().update(database.variables) # Removing some observations exclude = ((PURPOSE != 1) * (PURPOSE != 3) + (CHOICE == 0)) > 0 database.remove(exclude) ASC_CAR = Beta('ASC_CAR', 0, None, None, 0) ASC_TRAIN = Beta('ASC_TRAIN', 0, None, None, 0) ASC_SM = Beta('ASC_SM', 0, None, None, 1) B_TIME = Beta('B_TIME', 0, None, None, 0) B_TIME_S = Beta('B_TIME_S', 1, None, None, 0) B_COST = Beta('B_COST', 0, None, None, 0)
def run_simulation(data_file_directory_for_simulation, data_file_name_for_simulation, output_directory_for_simulation, betas, household_income_limit): """ :author: Antonin Danalet, based on the example '01logit_simul.py' by Michel Bierlaire, EPFL, on biogeme.epfl.ch Simulation with a binary logit model. Two alternatives: work from home at least some times, or not.""" # Read the data df_persons = pd.read_csv(data_file_directory_for_simulation / data_file_name_for_simulation, ';') database = db.Database('persons', df_persons) # The following statement allows you to use the names of the variable as Python variable. globals().update(database.variables) # Parameters to be estimated alternative_specific_constant = Beta('alternative_specific_constant', 0, None, None, 0) b_no_post_school_education = Beta('b_no_post_school_education', 0, None, None, 0) b_secondary_education = Beta('b_secondary_education', 0, None, None, 0) b_tertiary_education = Beta('b_tertiary_education', 0, None, None, 0) b_university = Beta('b_university', 0, None, None, 1) b_male = Beta('b_male', 0, None, None, 0) b_public_transport_connection_quality_na_home = Beta('b_public_transport_connection_quality_na_home', 0, None, None, 0) b_public_transport_connection_quality_a_work = Beta('b_public_transport_connection_quality_are_a_work', 0, None, None, 1) b_rural_work = Beta('b_rural_work', 0, None, None, 0) b_home_work_distance = Beta('b_home_work_distance', 0, None, None, 0) b_business_sector_agriculture = Beta('b_business_sector_agriculture', 0, None, None, 0) b_business_sector_production = Beta('b_business_sector_production', 0, None, None, 0) b_business_sector_wholesale = Beta('b_business_sector_wholesale', 0, None, None, 1) b_business_sector_retail = Beta('b_business_sector_retail', 0, None, None, 0) b_business_sector_gastronomy = Beta('b_business_sector_gastronomy', 0, None, None, 0) b_business_sector_finance = Beta('b_business_sector_finance', 0, None, None, 1) b_business_sector_services_fc = Beta('b_business_sector_services_fc', 0, None, None, 0) b_business_sector_other_services = Beta('b_business_sector_other_services', 0, None, None, 1) b_business_sector_others = Beta('b_business_sector_others', 0, None, None, 1) b_business_sector_non_movers = Beta('b_business_sector_non_movers', 0, None, None, 0) b_executives = Beta('b_executives', 0, None, None, 0) b_german = Beta('b_german', 0, None, None, 0) b_hh_income_8000_or_less = Beta('b_hh_income_8000_or_less', 0, None, None, 0) # Definition of new variables no_post_school_educ = education == 1 secondary_education = education == 2 tertiary_education = education == 3 university = education == 4 male = (sex == 1) public_transport_quality_NA_home = (public_transport_connection_quality_ARE_home == 5) public_transport_quality_A_work = (public_transport_connection_quality_ARE_work == 1) home_work_distance = (home_work_crow_fly_distance * (home_work_crow_fly_distance >= 0.0) / 100000.0) business_sector_agriculture = type_1 == 1 business_sector_retail = type_1 == 4 business_sector_gastronomy = type_1 == 5 business_sector_finance = type_1 == 6 business_sector_production = type_1 == 2 business_sector_wholesale = type_1 == 3 business_sector_services_fC = type_1 == 7 business_sector_other_services = type_1 == 8 business_sector_others = type_1 == 9 business_sector_non_movers = type_1 == 10 german = language == 1 nationality_switzerland = nation == 0 nationality_germany_austria = nation == 1 nationality_italy_vatican = nation == 2 nationality_france_monaco_s_marino = nation == 3 nationality_northwestern_europe = nation == 4 nationality_eastern_europe = nation == 7 hh_income_8000_or_less = hh_income < household_income_limit executives = (0 < position_in_bus) * (position_in_bus < 19) rural_work = urban_rural_typology_work == 3 # Utility utility_function_telecommuting = alternative_specific_constant + \ b_executives * executives + \ b_no_post_school_education * no_post_school_educ + \ b_secondary_education * secondary_education + \ b_tertiary_education * tertiary_education + \ b_university * university + \ b_male * male + \ b_public_transport_connection_quality_na_home * public_transport_quality_NA_home + \ b_public_transport_connection_quality_a_work * public_transport_quality_A_work + \ b_rural_work * rural_work + \ b_home_work_distance * home_work_distance + \ models.piecewiseFormula(age, [0, 20, 35, 75, 200]) + \ b_business_sector_agriculture * business_sector_agriculture + \ b_business_sector_retail * business_sector_retail + \ b_business_sector_gastronomy * business_sector_gastronomy + \ b_business_sector_finance * business_sector_finance + \ b_business_sector_production * business_sector_production + \ b_business_sector_wholesale * business_sector_wholesale + \ b_business_sector_services_fc * business_sector_services_fC + \ b_business_sector_other_services * business_sector_other_services + \ b_business_sector_others * business_sector_others + \ b_business_sector_non_movers * business_sector_non_movers + \ b_german * german + \ b_nationality_ch_germany_france_italy_nw_e * nationality_switzerland + \ b_nationality_ch_germany_france_italy_nw_e * nationality_germany_austria + \ b_nationality_ch_germany_france_italy_nw_e * nationality_italy_vatican + \ b_nationality_ch_germany_france_italy_nw_e * nationality_france_monaco_s_marino + \ b_nationality_ch_germany_france_italy_nw_e * nationality_northwestern_europe + \ b_nationality_ch_germany_france_italy_nw_e * nationality_eastern_europe + \ models.piecewiseFormula(work_percentage, [0, 90, 101]) + \ b_hh_income_8000_or_less * hh_income_8000_or_less utility_function_no_telecommuting = 0 # Associate utility functions with the numbering of alternatives utility_functions_with_numbering_of_alternatives = {1: utility_function_telecommuting, # Yes or sometimes 3: utility_function_no_telecommuting} # No availability_conditions = {1: 1, # Always available 3: 1} # Always available # The choice model is a logit, with availability conditions prob_telecommuting = models.logit(utility_functions_with_numbering_of_alternatives, availability_conditions, 1) prob_no_telecommuting = models.logit(utility_functions_with_numbering_of_alternatives, availability_conditions, 3) simulate = {'Prob. telecommuting': prob_telecommuting, 'Prob. no telecommuting': prob_no_telecommuting} # Create the Biogeme object biogeme = bio.BIOGEME(database, simulate) biogeme.modelName = 'logit_telecommuting_simul' # Define level of verbosity logger = msg.bioMessage() # logger.setSilent() logger.setWarning() # logger.setGeneral() # logger.setDetailed() # Get the betas from the estimation (without corrections) # path_to_estimation_folder = Path('../data/output/models/estimation/') # if os.path.isfile(path_to_estimation_folder / 'logit_telecommuting~00.pickle'): # raise Exception('There are several model outputs! Careful.') # results = res.bioResults(pickleFile=path_to_estimation_folder / 'logit_telecommuting.pickle') # betas_without_correction = results.getBetaValues() # Change the working directory, so that biogeme writes in the correct folder, i.e., where this file is standard_directory = os.getcwd() os.chdir(output_directory_for_simulation) results = biogeme.simulate(theBetaValues=betas) # print(results.describe()) df_persons = pd.concat([df_persons, results], axis=1) # Go back to the normal working directory os.chdir(standard_directory) # For unemployed people, fix probability of doing some home office to 0 (and probability of not doing to 1). df_persons.loc[df_persons.employed == 0, 'Prob. telecommuting'] = 0.0 # Unemployed people df_persons.loc[df_persons.employed == 0, 'Prob. no telecommuting'] = 1.0 # Unemployed people df_persons.loc[df_persons.employed == -99, 'Prob. telecommuting'] = 0.0 # Other people df_persons.loc[df_persons.employed == -99, 'Prob. no telecommuting'] = 1.0 # Other people # By definition, apprentices don't work from home (because they were not asked in the MTMC) df_persons.loc[df_persons.position_in_bus == 3, 'Prob. telecommuting'] = 0.0 df_persons.loc[df_persons.position_in_bus == 3, 'Prob. no telecommuting'] = 1.0 # Add a realisation of the probability df_persons['random 0/1'] = np.random.rand(len(df_persons)) df_persons['telecommuting_model'] = np.where(df_persons['random 0/1'] < df_persons['Prob. telecommuting'], 1, 0) del df_persons['random 0/1'] ''' Save the file ''' data_file_name = 'persons_from_SynPop_with_probability_telecommuting.csv' df_persons.to_csv(output_directory_for_simulation / data_file_name, sep=',', index=False)
Example of a logit model, with a Box-Cox transform of variables. Three alternatives: Train, Car and Swissmetro SP data """ import pandas as pd import biogeme.database as db import biogeme.biogeme as bio import biogeme.models as models import biogeme.messaging as msg from biogeme.expressions import Beta, DefineVariable # Read the data df = pd.read_csv('swissmetro.dat', '\t') database = db.Database('swissmetro', df) # The Pandas data structure is available as database.data. Use all the # Pandas functions to invesigate the database #print(database.data.describe()) # The following statement allows you to use the names of the variable # as Python variable. globals().update(database.variables) # Removing some observations can be done directly using pandas. #remove = (((database.data.PURPOSE != 1) & # (database.data.PURPOSE != 3)) | # (database.data.CHOICE == 0)) #database.data.drop(database.data[remove].index,inplace=True)
from pathlib import Path import numpy as np import pandas as pd pd.set_option('display.max_columns', None) mdata_folder = Path("C:/Users/langzx/Desktop/github/DCM/data") import biogeme.database as db import biogeme.biogeme as bio pandas = pd.read_table(mdata_folder / "widedata.dat") pandas = pandas.fillna(-2) database = db.Database("widedata", pandas) database database.data.describe() pandas.columns.values from headers import * # Removing some observations can be done directly using pandas. remove = (database.data.Choice == -2) database.data.drop(database.data[remove].index, inplace=True) ### Coefficients ASC_V = Beta('ASC_V', 0.0, -1000, 1000, 0) ASC_C = Beta('ASC_C', 0.0, -1000, 1000, 0) coef_wetland = Beta('coef_wetland', 0.0, -1000, 1000, 0) coef_cc = Beta('coef_cc', 0.0, -1000, 1000, 0) coef_nm = Beta('coef_nm ', 0.0, -1000, 1000, 0) coef_pay = Beta('coef_pay', 0.0, -1000, 1000, 0) ### Variables