예제 #1
0
    RandomVariable, Integrate, Elem, bioNormalCdf, exp

# Read the data
df = pd.read_csv('optima.dat', sep='\t')
database = db.Database('optima', df)

# The following statement allows you to use the names of the variable
# as Python variable.
globals().update(database.variables)

# Exclude observations such that the chosen alternative is -1
database.remove(Choice == -1.0)

# Read the estimates from the structural equation estimation
try:
    structResults = res.bioResults(pickleFile='02oneLatentOrdered.pickle')
except FileNotFoundError:
    print('Run first the script 02oneLatentOrdered.py in order to generate the file '
          '02oneLatentOrdered.pickle.')
    sys.exit()
structBetas = structResults.getBetaValues()

### Variables

# Piecewise linear definition of income
ScaledIncome = DefineVariable('ScaledIncome', CalculatedIncome / 1000, database)
thresholds = [None, 4, 6, 8, 10, None]
formulaIncome = models.piecewiseFormula(ScaledIncome,
                                        thresholds,
                                        [structBetas['beta_ScaledIncome_lessthan_4'],
                                         structBetas['beta_ScaledIncome_4_6'],
# Variable_name = ['Income_4000_less','Income_12000_more','age_60_more','age_35_less','moreThanOneCar','haveLicense',
#                  'male','highEducation','fulltimeJob','intercept']
#
# variable_dict = {'age_60_more':age_60_more,'Chinese':Chinese,
#                  'moreThanOneCar':moreThanOneCar,'male':male,
#                  'fulltimeJob':fulltimeJob,'age_35_less':age_35_less, 'kid_under18':kid_under18,
#                  'Commute':Commute,'intercept':1}

Variable_name = [
    'age_60_more', 'Chinese', 'male', 'fulltimeJob', 'age_35_less',
    'highEducation', 'kid_under18', 'Commute', 'intercept'
]

### simut results
structResults = res.bioResults(pickleFile='Seq_LatentOrdered_simul.pickle')
structBetas = structResults.getBetaValues()
coef = {}
attitude_name = ['Pro_Walk', 'Pro_PT', 'Pro_RH', 'Pro_Drive']
for att in attitude_name:
    coef[att] = {}
    for var in Variable_name:
        var_name = 'coef_' + var + '_' + att
        coef[att][var_name] = structBetas[var_name]

### seperate results
# coef = {}
# attitude_name = ['Pro_Walk','Pro_PT','Pro_RH','Pro_Drive']
# for att in attitude_name:
#     structResults = res.bioResults(pickleFile='Seq_LatentOrdered_'+ att +'.pickle')
#     structBetas = structResults.getBetaValues()
예제 #3
0
    bioDraws, MonteCarlo, exp, log

# Read the data
df = pd.read_csv('optima.dat', sep='\t')
database = db.Database('optima', df)

# The following statement allows you to use the names of the variable
# as Python variable.
globals().update(database.variables)

# Exclude observations such that the chosen alternative is -1
database.remove(Choice == -1.0)

# Read the estimates from the structural equation estimation
try:
    structResults = res.bioResults(pickleFile='02oneLatentOrdered.pickle')
except FileNotFoundError:
    print(
        'Run first the script 02oneLatentOrdered.py in order to generate the file '
        '02oneLatentOrdered.pickle.')
    sys.exit()
structBetas = structResults.getBetaValues()

### Variables

# Piecewise linear definition of income
ScaledIncome = DefineVariable('ScaledIncome', CalculatedIncome / 1000,
                              database)
thresholds = [None, 4, 6, 8, 10, None]
piecewiseVariables = models.piecewiseVariables(ScaledIncome, thresholds)
formulaIncome = structBetas['beta_ScaledIncome_lessthan_4'] * piecewiseVariables[0] + \
예제 #4
0
prob_sm_after = models.nested(V_after, None, nests, 2)

direct_elas_sm_dist = (prob_sm_after - prob_sm) * \
    distance_km / (prob_sm * delta_dist)

simulate = {
    'weight': normalizedWeight,
    'Prob. slow modes': prob_sm,
    'direct_elas_sm_dist': direct_elas_sm_dist
}

biogeme = bio.BIOGEME(database, simulate)
biogeme.modelName = '05nestedElasticitiesCI_Bootstrap'

# Read the estimation results from the file
results = res.bioResults(pickleFile='01nestedEstimation.pickle')

# simulatedValues is a Panda dataframe with the same number of rows as
# the database, and as many columns as formulas to simulate.
simulatedValues = biogeme.simulate(results.getBetaValues())

# We calculate the elasticities
simulatedValues['Weighted prob. slow modes'] = simulatedValues['weight'] * \
    simulatedValues['Prob. slow modes']

denominator_sm = simulatedValues['Weighted prob. slow modes'].sum()

direct_elas_sm_dist = (simulatedValues['Weighted prob. slow modes'] *
                       simulatedValues['direct_elas_sm_dist'] /
                       denominator_sm).sum()
print(f'Aggregate direct elasticity of slow modes wrt distance: '
예제 #5
0
    def quickEstimate(self,
                      algorithm=opt.simpleBoundsNewtonAlgorithmForBiogeme,
                      algoParameters=None):
        """Estimate the parameters of the model. Same as estimate, where any extra 
           calculation is skipped (init loglikelihood, t-statistics, etc.)

        :param algorithm: optimization algorithm to use for the
               maximum likelihood estimation. If None, cfsqp is
               . Default: Biogeme's Newton's algorithm with simple bounds.
        :type algorithm: function

        :param algoParameters: parameters to transfer to the optimization algorithm
        :type algoParameters: dict

        :return: object containing the estimation results.
        :rtype: biogeme.results.bioResults

        Example::

            # Create an instance of biogeme
            biogeme  = bio.BIOGEME(database, logprob)

            # Gives a name to the model
            biogeme.modelName = 'mymodel'

            # Estimate the parameters
            results = biogeme.quickEstimate()

        :raises biogemeError: if no expression has been provided for the likelihood

        """

        if self.loglike is None:
            raise excep.biogemeError(
                'No log likelihood function has been specificed')
        if len(self.freeBetaNames) == 0:
            raise excep.biogemeError(f'There is no parameter to estimate'
                                     f' in the formula: {self.loglike}.')

        self.algorithm = algorithm
        self.algoParameters = algoParameters

        start_time = datetime.now()
        #        yep.start('profile.out')

        #        yep.stop()

        output = self.optimize(self.betaInitValues)
        xstar, optimizationMessages = output
        ## Running time of the optimization algorithm
        optimizationMessages['Optimization time'] = datetime.now() - start_time
        ## Information provided by the optimization algorithm after completion.
        self.optimizationMessages = optimizationMessages

        f = self.calculateLikelihood(xstar, scaled=False)

        fgHb = f, None, None, None
        rawResults = res.rawResults(self,
                                    xstar,
                                    fgHb,
                                    bootstrap=self.bootstrap_results)
        r = res.bioResults(rawResults)
        return r
예제 #6
0
    def estimate(self,
                 bootstrap=0,
                 algorithm=opt.simpleBoundsNewtonAlgorithmForBiogeme,
                 algoParameters=None,
                 cfsqp_default_bounds=1000.0,
                 saveIterations=False,
                 file_iterations='__savedIterations.txt'):
        """Estimate the parameters of the model.

        :param bootstrap: number of bootstrap resampling used to
               calculate the variance-covariance matrix using
               bootstrapping. If the number is 0, bootstrapping is not
               applied. Default: 0.
        :type bootstrap: int

        :param algorithm: optimization algorithm to use for the
               maximum likelihood estimation. If None, cfsqp is
               . Default: Biogeme's Newton's algorithm with simple bounds.
        :type algorithm: function

        :param algoParameters: parameters to transfer to the optimization algorithm
        :type algoParameters: dict

        :param cfsqp_default_bounds: if the user does not provide bounds
              on the parameters, CFSQP assumes that the bounds are
              [-cfsqp_default_bounds, cfsqp_default_bounds]
        :type cfsqp_default_bounds: float

        :param saveIterations: if True, the values of the parameters
                               corresponding to the largest value of
                               the likelihood function are saved in a
                               pickle file at each iteration of the
                               algorithm. Default: False.
        :type saveIterations: bool

        :param file_iterations: name of the file where to save the
                               values of the parameters. Default:
                               '__savedIterations.txt'
        :type file_iterations: str

        :return: object containing the estimation results.
        :rtype: biogeme.bioResults

        Example::

            # Create an instance of biogeme
            biogeme  = bio.BIOGEME(database, logprob)

            # Gives a name to the model
            biogeme.modelName = 'mymodel'

            # Estimate the parameters
            results = biogeme.estimate()

        :raises biogemeError: if no expression has been provided for the likelihood

        """

        if self.loglike is None:
            raise excep.biogemeError(
                'No log likelihood function has been specificed')
        if len(self.freeBetaNames) == 0:
            raise excep.biogemeError(f'There is no parameter to estimate'
                                     f' in the formula: {self.loglike}.')

        self.algorithm = algorithm
        self.algoParameters = algoParameters

        self.cfsqp_default_bounds = cfsqp_default_bounds

        self.calculateInitLikelihood()
        self.saveIterations = saveIterations
        self.file_iterations = f'{file_iterations}'
        self.bestIteration = None

        start_time = datetime.now()
        #        yep.start('profile.out')

        #        yep.stop()

        output = self.optimize(self.betaInitValues)
        xstar, optimizationMessages = output
        ## Running time of the optimization algorithm
        optimizationMessages['Optimization time'] = datetime.now() - start_time
        ## Information provided by the optimization algorithm after completion.
        self.optimizationMessages = optimizationMessages

        fgHb = self.calculateLikelihoodAndDerivatives(xstar,
                                                      scaled=False,
                                                      hessian=True,
                                                      bhhh=True)
        if not np.isfinite(fgHb[2]).all():
            warning_msg = ('Numerical problems in calculating '
                           'the analytical hessian. Finite differences'
                           ' is tried instead.')
            self.logger.warning(warning_msg)
            finDiffHessian = self.likelihoodFiniteDifferenceHessian(xstar)
            if not np.isfinite(fgHb[2]).all():
                self.logger.warning(
                    'Numerical problems with finite difference hessian as well.'
                )
            else:
                fgHb = fgHb[0], fgHb[1], finDiffHessian, fgHb[3]
        ## numpy array, of size B x K,
        # where
        #        - B is the number of bootstrap iterations
        #        - K is the number pf parameters to estimate
        self.bootstrap_results = None
        if bootstrap > 0:
            start_time = datetime.now()

            self.logger.general(
                f'Re-estimate the model {bootstrap} times for bootstrapping')
            self.bootstrap_results = np.empty(shape=[bootstrap, len(xstar)])
            self.logger.temporarySilence()
            for b in range(bootstrap):
                if self.database.isPanel():
                    sample = self.database.sampleIndividualMapWithReplacement()
                    self.theC.setDataMap(sample)
                else:
                    sample = self.database.sampleWithReplacement()
                    self.theC.setData(sample)
                x_br, _ = self.optimize(xstar)
                self.bootstrap_results[b] = x_br

            ## Time needed to generate the bootstrap results
            self.bootstrap_time = datetime.now() - start_time
            self.logger.resume()
        rawResults = res.rawResults(self,
                                    xstar,
                                    fgHb,
                                    bootstrap=self.bootstrap_results)
        r = res.bioResults(rawResults)
        if self.generateHtml:
            r.writeHtml()
        if self.generatePickle:
            r.writePickle()
        return r
예제 #7
0
def scenario(scale):
    """Simulate a scenarios modifying the price of public transportation

    :param scale: price multiplier.
    :type scale: float

    :return: simulated revenues
    :rtype: float
    """
    # This is the only variable that depends on scale
    MarginalCostScenario = MarginalCostPT * scale
    MarginalCostPT_scaled = MarginalCostScenario / 10

    # The rest of the model is the same for all scenarios
    ASC_CAR = Beta('ASC_CAR', 0, None, None, 0)
    ASC_PT = Beta('ASC_PT', 0, None, None, 1)
    ASC_SM = Beta('ASC_SM', 0, None, None, 0)
    BETA_TIME_FULLTIME = Beta('BETA_TIME_FULLTIME', 0, None, None, 0)
    BETA_TIME_OTHER = Beta('BETA_TIME_OTHER', 0, None, None, 0)
    BETA_DIST_MALE = Beta('BETA_DIST_MALE', 0, None, None, 0)
    BETA_DIST_FEMALE = Beta('BETA_DIST_FEMALE', 0, None, None, 0)
    BETA_DIST_UNREPORTED = Beta('BETA_DIST_UNREPORTED', 0, None, None, 0)
    BETA_COST = Beta('BETA_COST', 0, None, None, 0)
    # Utility functions
    V_PT = ASC_PT + BETA_TIME_FULLTIME * TimePT_scaled * fulltime + \
        BETA_TIME_OTHER * TimePT_scaled * notfulltime + \
        BETA_COST * MarginalCostPT_scaled
    V_CAR = ASC_CAR + \
        BETA_TIME_FULLTIME * TimeCar_scaled * fulltime + \
        BETA_TIME_OTHER * TimeCar_scaled * notfulltime + \
        BETA_COST * CostCarCHF_scaled
    V_SM = ASC_SM + \
        BETA_DIST_MALE * distance_km_scaled * male + \
        BETA_DIST_FEMALE * distance_km_scaled * female + \
        BETA_DIST_UNREPORTED * distance_km_scaled * unreportedGender
    V = {0: V_PT, 1: V_CAR, 2: V_SM}
    MU_NOCAR = Beta('MU_NOCAR', 1.0, 1.0, None, 0)
    CAR_NEST = 1.0, [1]
    NO_CAR_NEST = MU_NOCAR, [0, 2]
    nests = CAR_NEST, NO_CAR_NEST
    prob_pt = models.nested(V, None, nests, 0)
    simulate = {
        'weight': normalizedWeight,
        'Revenue public transportation': prob_pt * MarginalCostScenario
    }

    biogeme = bio.BIOGEME(database, simulate)
    biogeme.modelName = '02nestedPlot'

    # Read the estimation results from the file
    try:
        results = res.bioResults(pickleFile='01nestedEstimation.pickle')
    except FileNotFoundError:
        sys.exit(
            'Run first the script 01nestedEstimation.py in order to generate '
            'the file 01nestedEstimation.pickle.')
    # Simulation
    simulatedValues = biogeme.simulate(results.getBetaValues())

    # We calculate the sum for all individuals of the generated revenues.
    revenues_pt = (simulatedValues['Revenue public transportation'] *
                   simulatedValues['weight']).sum()
    return revenues_pt
예제 #8
0
V2 = ASC_SM + \
     B_TIME_RND * SM_TT_SCALED + \
     B_COST * SM_COST_SCALED
V3 = ASC_CAR + \
     B_TIME_RND * CAR_TT_SCALED + \
     B_COST * CAR_CO_SCALED

# Associate utility functions with the numbering of alternatives
V = {1: V1, 2: V2, 3: V3}

# Associate the availability conditions with the alternatives
av = {1: TRAIN_AV_SP, 2: SM_AV, 3: CAR_AV_SP}

# The estimation results are read from the pickle file
try:
    results = res.bioResults(pickleFile='05normalMixture.pickle')
except FileNotFoundError:
    print(
        'Run first the script 05normalMixture.py in order to generate the file '
        '05normalMixture.pickle.')
    sys.exit()

# Conditional to B_TIME_RND, we have a logit model (called the kernel)
prob = models.logit(V, av, CHOICE)

# We calculate the integration error. Note that this formula assumes
# independent draws, and is not valid for Haltom or antithetic draws.
numberOfDraws = 100000
integral = MonteCarlo(prob)
integralSquare = MonteCarlo(prob * prob)
variance = integralSquare - integral * integral
av = {1: TRAIN_AV_SP, 2: SM_AV, 3: CAR_AV_SP}

#Definition of nests:
alpha_existing = {1: ALPHA_EXISTING, 2: 0.0, 3: 1.0}

alpha_public = {1: ALPHA_PUBLIC, 2: 1.0, 3: 0.0}

nest_existing = MU_EXISTING, alpha_existing
nest_public = MU_PUBLIC, alpha_public
nests = nest_existing, nest_public
logprob = models.logcnl_avail(V, av, nests, CHOICE)
biogeme = bio.BIOGEME(database, logprob)

# Instead of estimating the parameters, read the estimation
# results from the pickle file.
results = res.bioResults(pickleFile='11cnl.pickle')
print("Estimaton results: ", results)

# The choice model is a cross-nested logit, with availability conditions
prob1 = models.cnl_avail(V, av, nests, 1)
prob2 = models.cnl_avail(V, av, nests, 2)
prob3 = models.cnl_avail(V, av, nests, 3)

genelas1 = Derive(prob1, 'TRAIN_TT') * TRAIN_TT / prob1
genelas2 = Derive(prob2, 'SM_TT') * SM_TT / prob2
genelas3 = Derive(prob3, 'CAR_TT') * CAR_TT / prob3

simulate = {
    'Prob. train': prob1,
    'Prob. Swissmetro': prob2,
    'Prob. car': prob3,