Beispiel #1
0
def checkDerivatives(theFunction, x, names=None, logg=False):
    """Verifies the analytical derivatives of a function by comparing them with finite
       difference approximations.

    :param theFunction:  A function object that takes a vector as an  argument, and
                  returns a tuple.

          - The first element of the tuple is the value of the function :math:`f`,
          - the second is the gradient of the function,
          - the third is the hessian.

    :type theFunction: function

    :param x: arguments of the function
    :type x: numpy.array

    :param names: the names of the entries of x (for reporting).
    :type names: list(string)
    :param logg: if True, messages will be displayed.
    :type logg: bool


    :return: tuple f, g, h, gdiff, hdiff where

          - f is the value of the function at x,
          - g is the analytical gradient,
          - h is the analytical hessian,
          - gdiff is the difference between the analytical gradient and the finite
                 difference approximation
          - hdiff is the difference between the analytical hessian and the finite
                 difference approximation

    :rtype: float, numpy.array,numpy.array,  numpy.array,numpy.array
    """
    f, g, h = theFunction(x)
    g_num = findiff_g(theFunction, x)
    gdiff = g - g_num
    if logg:
        logger = msg.bioMessage()
        if names is None:
            names = [f'x[{i}]' for i in range(len(x))]
        logger.detailed('x\t\tGradient\tFinDiff\t\tDifference')
        for k, v in enumerate(gdiff):
            logger.detailed(f'{names[k]:15}\t{g[k]:+E}\t{g_num[k]:+E}\t{v:+E}')

    h_num = findiff_H(theFunction, x)
    hdiff = h - h_num
    if logg:
        logger.detailed('Row\t\tCol\t\tHessian\tFinDiff\t\tDifference')
        for row in range(len(hdiff)):
            for col in range(len(hdiff)):
                logger.detailed(
                    f'{names[row]:15}\t{names[col]:15}\t{h[row,col]:+E}\t'
                    f'{h_num[row,col]:+E}\t{hdiff[row,col]:+E}')
    return f, g, h, gdiff, hdiff
Beispiel #2
0
     B_COST * SM_COST_SCALED
V3 = ASC_CAR + \
     B_TIME * models.boxcox(CAR_TT_SCALED, LAMBDA) + \
     B_COST * CAR_CO_SCALED

# Associate utility functions with the numbering of alternatives
V = {1: V1, 2: V2, 3: V3}

# Associate the availability conditions with the alternatives
av = {1: TRAIN_AV_SP, 2: SM_AV, 3: CAR_AV_SP}

# Definition of the model. This is the contribution of each
# observation to the log likelihood function.
logprob = models.loglogit(V, av, CHOICE)

# Define level of verbosity
logger = msg.bioMessage()
logger.setSilent()
#logger.setWarning()
#logger.setGeneral()
#logger.setDetailed()

# Create the Biogeme object
biogeme = bio.BIOGEME(database, logprob)
biogeme.modelName = '08boxcox'

# Estimate the parameters
results = biogeme.estimate()
pandasResults = results.getEstimatedParameters()
print(pandasResults)
Beispiel #3
0
    def __init__(self, name, pandasDatabase):
        """Constructor

        :param name: name of the database.
        :type name: string

        :param pandasDatabase: data stored in a pandas data frame.
        :type pandasDatabase: pandas.DataFrame

        """
        self.logger = msg.bioMessage()
        start_time = datetime.now()
        ## Name of the database. Used mainly for the file name when dumping data.
        self.name = name

        ## Pandas data frame containing the data.
        self.data = pandasDatabase
        self.fullData = pandasDatabase

        ## self.variables is initialized by _generateHeaders()
        self.variables = None
        self._generateHeaders()

        ## Number of observations removed by the function Database.remove
        self.excludedData = 0

        ## Name of the column identifying the individuals in a panel
        ## data context. None if data is not panel.
        self.panelColumn = None

        ## map identifying the range of observations for each
        ## individual in a panel data context. None if data is not
        ## panel.
        self.individualMap = None
        self.fullIndividualMap = None

        ## Initialize the dictionary containing random number
        ## generators with a series of native generators.
        self._initNativeRandomNumberGenerators()

        ## Dictionary containing user defined random number
        ## generators. Defined by the function
        ## Database.setRandomNumberGenerators that checks that
        ## reserved keywords are not used. The element of the
        ## dictionary is a tuple with two elements: (0) the function
        ## generating the draws, and (1) a string describing the type of draws
        self.userRandomNumberGenerators = dict()

        ## Number of draws generated by the function Database.generateDraws.
        ## Value 0 if this function is not called.
        self.numberOfDraws = 0
        ## Types of draws for Monte Carlo integration
        self.typesOfDraws = {}

        self._auditDone = False

        ## Draws for Monte-Carlo integration
        self.theDraws = None

        ## Availability expression to check
        self._avail = None

        ## Choice expression to check
        self._choice = None

        ## Expression to check
        self._expression = None

        listOfErrors, listOfWarnings = self._audit()
        if listOfWarnings:
            self.logger.warning('\n'.join(listOfWarnings))
        if listOfErrors:
            self.logger.warning('\n'.join(listOfErrors))
            raise excep.biogemeError('\n'.join(listOfErrors))
Beispiel #4
0
def run_simulation(data_file_directory_for_simulation, data_file_name_for_simulation, output_directory_for_simulation,
                   betas, household_income_limit):
    """
        :author: Antonin Danalet, based on the example '01logit_simul.py' by Michel Bierlaire, EPFL, on biogeme.epfl.ch

        Simulation with a binary logit model. Two alternatives: work from home at least some times, or not."""

    # Read the data
    df_persons = pd.read_csv(data_file_directory_for_simulation / data_file_name_for_simulation, ';')
    database = db.Database('persons', df_persons)

    # The following statement allows you to use the names of the variable as Python variable.
    globals().update(database.variables)

    # Parameters to be estimated
    alternative_specific_constant = Beta('alternative_specific_constant', 0, None, None, 0)
    b_no_post_school_education = Beta('b_no_post_school_education', 0, None, None, 0)
    b_secondary_education = Beta('b_secondary_education', 0, None, None, 0)
    b_tertiary_education = Beta('b_tertiary_education', 0, None, None, 0)
    b_university = Beta('b_university', 0, None, None, 1)
    b_male = Beta('b_male', 0, None, None, 0)
    b_public_transport_connection_quality_na_home = Beta('b_public_transport_connection_quality_na_home',
                                                         0, None, None, 0)
    b_public_transport_connection_quality_a_work = Beta('b_public_transport_connection_quality_are_a_work',
                                                        0, None, None, 1)
    b_rural_work = Beta('b_rural_work', 0, None, None, 0)
    b_home_work_distance = Beta('b_home_work_distance', 0, None, None, 0)
    b_business_sector_agriculture = Beta('b_business_sector_agriculture', 0, None, None, 0)
    b_business_sector_production = Beta('b_business_sector_production', 0, None, None, 0)
    b_business_sector_wholesale = Beta('b_business_sector_wholesale', 0, None, None, 1)
    b_business_sector_retail = Beta('b_business_sector_retail', 0, None, None, 0)
    b_business_sector_gastronomy = Beta('b_business_sector_gastronomy', 0, None, None, 0)
    b_business_sector_finance = Beta('b_business_sector_finance', 0, None, None, 1)
    b_business_sector_services_fc = Beta('b_business_sector_services_fc', 0, None, None, 0)
    b_business_sector_other_services = Beta('b_business_sector_other_services', 0, None, None, 1)
    b_business_sector_others = Beta('b_business_sector_others', 0, None, None, 1)
    b_business_sector_non_movers = Beta('b_business_sector_non_movers', 0, None, None, 0)
    b_executives = Beta('b_executives', 0, None, None, 0)
    b_german = Beta('b_german', 0, None, None, 0)
    b_hh_income_8000_or_less = Beta('b_hh_income_8000_or_less', 0, None, None, 0)

    # Definition of new variables
    no_post_school_educ = education == 1
    secondary_education = education == 2
    tertiary_education = education == 3
    university = education == 4

    male = (sex == 1)

    public_transport_quality_NA_home = (public_transport_connection_quality_ARE_home == 5)
    public_transport_quality_A_work = (public_transport_connection_quality_ARE_work == 1)

    home_work_distance = (home_work_crow_fly_distance * (home_work_crow_fly_distance >= 0.0) / 100000.0)

    business_sector_agriculture = type_1 == 1
    business_sector_retail = type_1 == 4
    business_sector_gastronomy = type_1 == 5
    business_sector_finance = type_1 == 6
    business_sector_production = type_1 == 2
    business_sector_wholesale = type_1 == 3
    business_sector_services_fC = type_1 == 7
    business_sector_other_services = type_1 == 8
    business_sector_others = type_1 == 9
    business_sector_non_movers = type_1 == 10
    german = language == 1
    nationality_switzerland = nation == 0
    nationality_germany_austria = nation == 1
    nationality_italy_vatican = nation == 2
    nationality_france_monaco_s_marino = nation == 3
    nationality_northwestern_europe = nation == 4
    nationality_eastern_europe = nation == 7
    hh_income_8000_or_less = hh_income < household_income_limit
    executives = (0 < position_in_bus) * (position_in_bus < 19)
    rural_work = urban_rural_typology_work == 3

    #  Utility
    utility_function_telecommuting = alternative_specific_constant + \
                                     b_executives * executives + \
                                     b_no_post_school_education * no_post_school_educ + \
                                     b_secondary_education * secondary_education + \
                                     b_tertiary_education * tertiary_education + \
                                     b_university * university + \
                                     b_male * male + \
                                     b_public_transport_connection_quality_na_home * public_transport_quality_NA_home + \
                                     b_public_transport_connection_quality_a_work * public_transport_quality_A_work + \
                                     b_rural_work * rural_work + \
                                     b_home_work_distance * home_work_distance + \
                                     models.piecewiseFormula(age, [0, 20, 35, 75, 200]) + \
                                     b_business_sector_agriculture * business_sector_agriculture + \
                                     b_business_sector_retail * business_sector_retail + \
                                     b_business_sector_gastronomy * business_sector_gastronomy + \
                                     b_business_sector_finance * business_sector_finance + \
                                     b_business_sector_production * business_sector_production + \
                                     b_business_sector_wholesale * business_sector_wholesale + \
                                     b_business_sector_services_fc * business_sector_services_fC + \
                                     b_business_sector_other_services * business_sector_other_services + \
                                     b_business_sector_others * business_sector_others + \
                                     b_business_sector_non_movers * business_sector_non_movers + \
                                     b_german * german + \
                                     b_nationality_ch_germany_france_italy_nw_e * nationality_switzerland + \
                                     b_nationality_ch_germany_france_italy_nw_e * nationality_germany_austria + \
                                     b_nationality_ch_germany_france_italy_nw_e * nationality_italy_vatican + \
                                     b_nationality_ch_germany_france_italy_nw_e * nationality_france_monaco_s_marino + \
                                     b_nationality_ch_germany_france_italy_nw_e * nationality_northwestern_europe + \
                                     b_nationality_ch_germany_france_italy_nw_e * nationality_eastern_europe + \
                                     models.piecewiseFormula(work_percentage, [0, 90, 101]) + \
                                     b_hh_income_8000_or_less * hh_income_8000_or_less
    utility_function_no_telecommuting = 0

    # Associate utility functions with the numbering of alternatives
    utility_functions_with_numbering_of_alternatives = {1: utility_function_telecommuting,  # Yes or sometimes
                                                        3: utility_function_no_telecommuting}  # No

    availability_conditions = {1: 1,  # Always available
                               3: 1}  # Always available

    # The choice model is a logit, with availability conditions
    prob_telecommuting = models.logit(utility_functions_with_numbering_of_alternatives, availability_conditions, 1)
    prob_no_telecommuting = models.logit(utility_functions_with_numbering_of_alternatives, availability_conditions, 3)

    simulate = {'Prob. telecommuting': prob_telecommuting,
                'Prob. no telecommuting': prob_no_telecommuting}

    # Create the Biogeme object
    biogeme = bio.BIOGEME(database, simulate)
    biogeme.modelName = 'logit_telecommuting_simul'

    # Define level of verbosity
    logger = msg.bioMessage()
    # logger.setSilent()
    logger.setWarning()
    # logger.setGeneral()
    # logger.setDetailed()

    # Get the betas from the estimation (without corrections)
    # path_to_estimation_folder = Path('../data/output/models/estimation/')
    # if os.path.isfile(path_to_estimation_folder / 'logit_telecommuting~00.pickle'):
    #     raise Exception('There are several model outputs! Careful.')
    # results = res.bioResults(pickleFile=path_to_estimation_folder / 'logit_telecommuting.pickle')
    # betas_without_correction = results.getBetaValues()

    # Change the working directory, so that biogeme writes in the correct folder, i.e., where this file is
    standard_directory = os.getcwd()
    os.chdir(output_directory_for_simulation)

    results = biogeme.simulate(theBetaValues=betas)
    # print(results.describe())
    df_persons = pd.concat([df_persons, results], axis=1)

    # Go back to the normal working directory
    os.chdir(standard_directory)

    # For unemployed people, fix probability of doing some home office to 0 (and probability of not doing to 1).
    df_persons.loc[df_persons.employed == 0, 'Prob. telecommuting'] = 0.0  # Unemployed people
    df_persons.loc[df_persons.employed == 0, 'Prob. no telecommuting'] = 1.0  # Unemployed people
    df_persons.loc[df_persons.employed == -99, 'Prob. telecommuting'] = 0.0  # Other people
    df_persons.loc[df_persons.employed == -99, 'Prob. no telecommuting'] = 1.0  # Other people
    # By definition, apprentices don't work from home (because they were not asked in the MTMC)
    df_persons.loc[df_persons.position_in_bus == 3, 'Prob. telecommuting'] = 0.0
    df_persons.loc[df_persons.position_in_bus == 3, 'Prob. no telecommuting'] = 1.0

    # Add a realisation of the probability
    df_persons['random 0/1'] = np.random.rand(len(df_persons))
    df_persons['telecommuting_model'] = np.where(df_persons['random 0/1'] < df_persons['Prob. telecommuting'], 1, 0)
    del df_persons['random 0/1']

    ''' Save the file '''
    data_file_name = 'persons_from_SynPop_with_probability_telecommuting.csv'
    df_persons.to_csv(output_directory_for_simulation / data_file_name, sep=',', index=False)