Ejemplo n.º 1
0
    def match_variable_with_coefficient_names(self, coefnames, varnames):
        """The i-th element of the string array 'coefnames' is matched to the i-th element of the string array 'varnames'.
        """
        ndisteqs = self.nequations()

        for ivar in range(self.n):
            matches = ematch(varnames, self.variable_names[ivar].get_alias())
            l = matches.size
            if (l > (ndisteqs*self.nsubmodels*max(1,sum(self.get_other_ndim())))) or (l == 0):
                raise StandardError, "Method match_variable_with_coefficient_names: something wrong with variable names."
            for i in range(l): #iterate over matches of variables
                v_matches = ematch(self.coefficient_names, coefnames[matches[i]])
                if v_matches.size == 0:
                    raise StandardError, "Method match_variable_with_coefficient_names: Mismatch in coefficient and variable names."
                for j in range(v_matches.size): #iterate over matches in coefficient class
                    if (self.nsubmodels==1) or (self.specification.get_submodels()[matches[i]] ==
                            self.coefficients.get_submodels()[v_matches[j]]):
                        eqidx = 0
                        submidx = 0
                        if self.nsubmodels > 1:
                            submidx = self.submodels_mapping[self.specification.get_submodels()[matches[i]]]
                        if len(self.specification.get_equations()) > 1:
                            if len(self._equation_index_mapping.keys()) > 0:
                                eqidx = self._equation_index_mapping[self.specification.get_equations()[matches[i]]]
                            else:
                                eqidx = int(self.specification.get_equations()[matches[i]]-1)
                        else:
                            eqidx = range(self.coefmap.shape[0])
                        coefmap_index = [eqidx,ivar,submidx]
                        for dimname in self.other_dimensions_values.keys():
                            idx = self.other_dimensions_mapping[dimname][self.specification.get_other_field(dimname)[matches[i]]]
                            coefmap_index.append(idx)
                        self.coefmap[tuple(coefmap_index)] = v_matches[j]
Ejemplo n.º 2
0
def get_constants(specification):
    variable_names = asarray(specification.get_variable_names())
    matches = ematch(variable_names, constant_string)
#    matches = matches[where(specification.equations[matches] < 1)]
    coefnames = get_distinct_names(specification.get_coefficient_names()[matches])
#    if len(coefnames) > 1:
#        raise CoefConstantsLengthException
    return coefnames
Ejemplo n.º 3
0
 def delete(self, variables):
     """ Delete given variables from specification."""
     variables = tuple(variables)
     idx_list = []
     variable_names = asarray(map(lambda x: x.get_alias(), self.variables))
     nvariables = variable_names.size
     will_not_delete = array(nvariables * [True], dtype='bool8')
     for var in variables:
         idx = ematch(variable_names, var)
         if idx.size > 0:
             will_not_delete[idx] = False
     self.do_shrink(variable_names, where(will_not_delete)[0])
Ejemplo n.º 4
0
 def shrink(self, variables):
     """ Shrink all arrays of class attributes to those elements that correspond to given variables.
     """
     variables = tuple(variables)
     idx_list = []
     variable_names = asarray(map(lambda x: x.get_alias(), self.variables))
     for var in variables:
         idx = ematch(variable_names, var)
         if idx.size > 0:
             idx_list.append(idx[0])
     idx_array = asarray(idx_list)
     self.do_shrink(variable_names, idx_array)
    def truncate_coefficients(self, coefficients):
        """Leave only that part of coefficients that corresponds to specification."""
        specnames = self.specification.get_distinct_coefficient_names()
        if specnames.size <= 0:
            return coefficients.copy_and_truncate(array([], dtype='int32'))
        coefnames = coefficients.get_names()
        index_list = []

        for icoef in range(specnames.size):
            matches = ematch(coefnames, specnames[icoef])
            l = len(matches)
            if l > 0:
                for i in range(l):
                    index_list.append(matches[i])
        return coefficients.copy_and_truncate(array(index_list, dtype=int16))
Ejemplo n.º 6
0
    def truncate_coefficients(self, coefficients):
        """Leave only that part of coefficients that corresponds to specification."""
        specnames = self.specification.get_distinct_coefficient_names()
        if specnames.size <= 0:
            return coefficients.copy_and_truncate(array([], dtype='int32'))
        coefnames = coefficients.get_names()
        index_list=[]

        for icoef in range(specnames.size):
            matches = ematch(coefnames, specnames[icoef])
            l = len(matches)
            if l > 0:
                for i in range(l):
                    index_list.append(matches[i])
        # don't remove reserved names, i.e. starting with '__'
        #idx = where(map(lambda x: x.startswith('__'), coefnames))[0]
        #[index_list.append(i) for i in idx if i not in index_list]
        return coefficients.copy_and_truncate(array(index_list, dtype=int16))
Ejemplo n.º 7
0
 def sample_values(self,
                   distribution=None,
                   distribution_dictionary=None,
                   **kwargs):
     """
     Return a copy of self, where values are sampled from given distribution(s).
     If 'distribution' is 'normal', all coefficients are sampled from normal distribution
     (see docstring for sample_values_from_normal_distribution).
     If 'distribution' is 'uniform', all coefficients are sampled from uniform distribution
     (see docstring for sample_values_from_uniform_distribution).
     In both cases, kwargs are passed to the appropriate method.
     If 'distribution' is None, argument 'distribution_dictionary' must be given.
     This dictionary contains arguments for sampling different coefficients using different distributions.
     Keys of this dictionary are coefficient names, values are again dictionaries. These must have a 
     key 'distribution' which is either 'normal' or 'uniform'. An optional entry 'parameters' contains 
     a dictionary with keyword arguments passed to either sample_one_value_from_normal_distribution (multiplicator) or 
     sample_one_value_from_uniform_distribution (a, b, center_around_value). See example in test_sample_coefficients_mixed_distr.
     """
     if distribution == 'normal':
         logger.log_status(
             'Sampling coefficient values from normal distribution.')
         return self.sample_values_from_normal_distribution(**kwargs)
     elif distribution == 'uniform':
         logger.log_status(
             'Sampling coefficient values from uniform distribution.')
         return self.sample_values_from_uniform_distribution(**kwargs)
     elif distribution is None:
         if not isinstance(distribution_dictionary, dict):
             raise TypeError, "Either argument 'distribution' or argument 'distribution_dictionary' must be not None."
         # Every coefficient can have different distribution
         new_coef = self.copy_and_truncate(arange(self.size()))
         for name, args in distribution_dictionary.iteritems():
             idx = ematch(self.get_names(), name)
             if idx.size <= 0:
                 logger.log_warning(
                     'Coefficient %s not found. Sampling for this coefficient ignored.'
                     % name)
                 continue
             if 'distribution' not in args.keys():
                 logger.log_warning(
                     "The sampling dictionary for coefficient %s must contain the entry 'distribution'. Sampling for this coefficient ignored."
                     % name)
                 continue
             pars = {}
             if 'parameters' in args:
                 pars = args['parameters']
             if args['distribution'] == 'normal':
                 new_coef.values[
                     idx] = self.sample_one_value_from_normal_distribution(
                         idx, **pars)
             elif args['distribution'] == 'uniform':
                 new_coef.values[
                     idx] = self.sample_one_value_from_uniform_distribution(
                         idx, **pars)
             else:
                 logger.log_warning(
                     "Sampling from %s distribution not implemented. Sampling for %s ignored.",
                     (args['distribution'], name))
         return new_coef
     raise ValueError("Sampling from %s distribution not implemented." %
                      distribution)
Ejemplo n.º 8
0
 def get_values_of_one_coefficient(self, name):
     """Get values of a coefficient given by 'name'."""
     idx = ematch(self.get_names(), name)
     return array(self.get_values())[idx]
Ejemplo n.º 9
0
    def run(self, data, upc_sequence, resources):
        """
        'data' is of shape (nobservations, nchoices, nvariables).
        """

        nobs, alts, nvars = data.shape
        if resources.get("skip_generating_model_file", False):
            model_name = resources.get("biogeme_model_name", "default")
        else:
            model_name = create_model_file(alts, resources)

        choice_matrix = resources[
            "chosen_choice"]  # matrix (nobs x alts) of 0's and 1's. 1 is on positions of chosen location.
        chosen_choice = where(choice_matrix)[1] + 1
        # flatten data into 2d
        var_names = resources[
            "specified_coefficients"].get_variable_names_from_alt()

        index_of_non_constants = []
        for i in range(nvars):
            if not (var_names[i] == "constant"):
                index_of_non_constants.append(i)

        index_of_non_constants = array(index_of_non_constants)
        nvars_without_const = index_of_non_constants.size
        data_for_biogeme = zeros((nobs, alts * nvars_without_const + 1),
                                 dtype=float64)

        biogeme_var_names = []
        for ivar in range(nvars_without_const):
            for ialt in range(alts):
                biogeme_var_names.append(
                    var_names[index_of_non_constants[ivar]] + "_" +
                    str(ialt + 1))
                data_for_biogeme[:, ivar * alts +
                                 ialt] = data[:, ialt,
                                              index_of_non_constants[ivar]]

        data_for_biogeme[:, alts * nvars_without_const] = chosen_choice
        a_ptr = getpointer(data_for_biogeme)

        #
        # Prepare data headers
        #

        ncols = data_for_biogeme.shape[1]
        headers = biogeme.vectorStr(ncols)

        for i in range(ncols - 1):
            headers[i] = biogeme_var_names[i]
        headers[ncols - 1] = "choice"

        #
        # Define variables for the results
        #

        estimationResults = biogeme.patPythonResults()
        #
        # Invoke biogeme
        #

        biogemeObject = biogeme.patBiogemeScripting()
        biogemeObject.estimate(model_name, a_ptr, nobs, ncols, headers,
                               estimationResults)

        #
        # Use the results
        #

        print " timeStamp: ", estimationResults.getTimeStamp()

        print " version: ", estimationResults.getVersion()

        print " description: ", estimationResults.getDescription()

        print " model: ", estimationResults.getModel()

        print " drawsType: ", estimationResults.getDrawsType()

        print " numberOfDraws: ", estimationResults.numberOfDraws

        print " numberOfParameters: ", estimationResults.numberOfParameters

        print " numberOfObservations: ", estimationResults.numberOfObservations

        print " numberOfIndividuals: ", estimationResults.numberOfIndividuals

        print " nullLogLikelihood: ", estimationResults.nullLoglikelihood

        print " initLoglikelihood: ", estimationResults.initLoglikelihood

        print " finalLoglikelihood: ", estimationResults.finalLoglikelihood

        print " likelihoodRatioTest: ", estimationResults.likelihoodRatioTest

        print " rhoSquare: ", estimationResults.rhoSquare

        print " rhoBarSquare: ", estimationResults.rhoBarSquare

        print " finalGradientNorm: ", estimationResults.finalGradientNorm

        print " varianceCovariance: ", estimationResults.getVarianceCovariance(
        )
        print "Parameters\tEstimate\tStdErr\ttTest\tpValue"

        coef_names = resources[
            "specified_coefficients"].get_coefficient_names_from_alt()
        est_values = zeros(coef_names.size, dtype=float32)
        std_errors = zeros(coef_names.size, dtype=float32)
        tstat = zeros(coef_names.size, dtype=float32)
        pvalues = zeros(coef_names.size, dtype=float32)

        for i in range(estimationResults.totalNumberOfParameters):
            if (estimationResults.getFixed(i)):
                print estimationResults.getParamName(i), " is fixed"
            else:
                name = estimationResults.getParamName(i)
                idx = ematch(coef_names, name)[0]
                print name , '\t',\
                      estimationResults.getEstimate(i), '\t',\
                      estimationResults.getStdErrRobust(i),'\t',\
                      estimationResults.getTTestRobust(i),'\t',\
                      estimationResults.getPValueRobust(i)
                #print "Check... " , estimationResults.getEstimate(estimationResults.getParamName(i)) ;
                est_values[idx] = estimationResults.getEstimate(i)
                std_errors[idx] = estimationResults.getStdErrRobust(i)
                tstat[idx] = estimationResults.getTTestRobust(i)
                pvalues[idx] = estimationResults.getPValueRobust(i)
        logger.log_status("Biogeme model: ", model_name)
        return {
            "estimators": est_values,
            "standard_errors": std_errors,
            "other_measures": {
                "t_statistic": tstat,
                "p_values": pvalues
            },
            "other_info": {
                "ll_ratio_index":
                1 - (estimationResults.finalLoglikelihood /
                     estimationResults.nullLoglikelihood),
                "ll_ratio_test_statistics":
                estimationResults.likelihoodRatioTest,
                "nobs":
                estimationResults.numberOfObservations,
                "Rho-Square":
                estimationResults.rhoSquare,
                "Rho-bar-Square":
                estimationResults.rhoBarSquare
            }
        }
Ejemplo n.º 10
0
    def estimate_dcm(self, data):
        nobs, alts, nvars, M = data.shape
        self.M = M
        depm = self.resources[
            "chosen_choice"]  # matrix (nobs x alts) of 0's and 1's. 1 is on positions of chosen location.
        tags = ["estimate", "result"]
        vl = 2
        coef_names = self.resources.get("coefficient_names", None)
        nest_numbers = self.get_nest_numbers()

        index_of_fixed_values = zeros(nvars + M, dtype="bool8")
        fixed_coefs, fixed_values = self.resources.get("fixed_values",
                                                       (array([]), array([])))
        if (coef_names is not None) and (fixed_coefs.size > 0):
            index_of_fixed_values[get_indices_of_matched_items(
                coef_names, fixed_coefs)] = True
        index_of_not_fixed_values = logical_not(index_of_fixed_values)

        beta = zeros(nvars + M).astype(float32)
        beta[-M:] = self.range_mu[1]
        beta[index_of_fixed_values] = fixed_values.astype(beta.dtype)
        l_0 = self.nl_loglikelihood(beta, data, depm)

        ls_idx = arange(nvars, nvars + M)
        for name, sv in self.resources.get("starting_values", {}).iteritems():
            est = True
            if isinstance(sv, tuple) or isinstance(sv, list):
                est = sv[1]
                sv = sv[0]
            if name.startswith('__logsum_'):
                if nest_numbers is not None:
                    idx = ls_idx[where(nest_numbers == int(name[9:]))[0]]
                else:
                    idx = array([ls_idx[int(name[9:]) - 1]])
            else:
                idx = ematch(coef_names, name)
            beta[idx] = sv
            index_of_fixed_values[idx] = not (est)

        index_of_not_fixed_values = where(
            logical_not(index_of_fixed_values))[0]
        index_of_fixed_values = where(index_of_fixed_values)[0]

        bounds = index_of_not_fixed_values.size * [(None, None)]
        j = 0
        for i in range(nvars + M - 1, nvars - 1, -1):
            if i in index_of_not_fixed_values:
                bounds[index_of_not_fixed_values.size - j - 1] = self.range_mu
                j += 1

        logger.start_block('BFGS procedure')
        bfgs_result = fmin_bfgs(
            self.minus_nl_loglikelihood,
            beta[index_of_not_fixed_values],
            args=(data, depm, beta[index_of_fixed_values],
                  index_of_not_fixed_values, index_of_fixed_values),
            full_output=True,
            disp=True,
            epsilon=self.resources.get('bfgs_epsilon', self._epsilon),
        )

        logger.end_block()
        beta[index_of_not_fixed_values] = bfgs_result[0].astype(beta.dtype)
        se = zeros(nvars + M)
        tvalues = zeros(nvars + M)
        mingrad = bfgs_result[2]

        if not self.resources.get('bfgs_approximate_second_derivative',
                                  self._approximate_second_derivative):
            inv_hessian = bfgs_result[3]
            se[index_of_not_fixed_values] = sqrt(diagonal(inv_hessian))
        else:
            sec_der = approximate_second_derivative(
                self.minus_nl_loglikelihood,
                beta[index_of_not_fixed_values],
                args=(data, depm, beta[index_of_fixed_values],
                      index_of_not_fixed_values, index_of_fixed_values))
            inv_hessian = 1.0 / sec_der
            se[index_of_not_fixed_values] = sqrt(inv_hessian)

        tvalues[index_of_not_fixed_values] = beta[
            index_of_not_fixed_values] / se[index_of_not_fixed_values]

        l_1 = self.nl_loglikelihood(beta, data, depm)

        ll_ratio = 1 - (l_1 / l_0)
        adj_ll_ratio = 1 - ((l_1 - nvars - M) / l_0)

        # http://en.wikipedia.org/wiki/Akaike_information_criterion
        aic = 2 * index_of_not_fixed_values.size - 2 * l_1
        logger.log_status("Akaike's Information Criterion (AIC): ",
                          str(aic),
                          tags=tags,
                          verbosity=vl)
        bic = -2 * l_1 + index_of_not_fixed_values.size * log(nobs)
        logger.log_status("Bayesian Information Criterion (BIC): ",
                          str(bic),
                          tags=tags,
                          verbosity=vl)
        logger.log_status("***********************************************",
                          tags=tags,
                          verbosity_level=vl)
        logger.log_status('Log-likelihood is:           ',
                          l_1,
                          tags=tags,
                          verbosity_level=vl)
        logger.log_status('Null Log-likelihood is:      ',
                          l_0,
                          tags=tags,
                          verbosity_level=vl)
        logger.log_status('Likelihood ratio index:      ',
                          ll_ratio,
                          tags=tags,
                          verbosity_level=vl)
        logger.log_status('Adj. likelihood ratio index: ',
                          adj_ll_ratio,
                          tags=tags,
                          verbosity_level=vl)
        logger.log_status('Number of observations:      ',
                          nobs,
                          tags=tags,
                          verbosity_level=vl)
        logger.log_status('Suggested |t-value| >        ', sqrt(log(nobs)))
        logger.log_status("-----------------------------------------------",
                          tags=tags,
                          verbosity_level=vl)
        if coef_names is not None:
            nestn = nest_numbers
            if nestn is None:
                nestn = range(1, M + 1)
            names = concatenate(
                (coef_names, array(map(lambda x: '__logsum_%s' % x, nestn))))
        else:
            names = [''] * (nvars + M)
        logger.log_status(
            "Coeff_names\testimate\tstd err\t\tt-values\tgradient",
            tags=tags,
            verbosity_level=vl)
        for i in range(index_of_not_fixed_values.size):
            logger.log_status(
                "%10s\t%8g\t%8g\t%8g\t%8g" %
                (names[index_of_not_fixed_values[i]],
                 beta[index_of_not_fixed_values[i]],
                 se[index_of_not_fixed_values[i]],
                 tvalues[index_of_not_fixed_values[i]], mingrad[i]),
                tags=tags,
                verbosity_level=vl)
        logger.log_status('***********************************************',
                          tags=tags,
                          verbosity_level=vl)
        logger.log_status('Elapsed time: ',
                          time.clock() - self.start_time,
                          'seconds',
                          tags=tags,
                          verbosity_level=vl)
        df = nvars + M - index_of_fixed_values.size
        lrts = -2 * (l_0 - l_1)
        return {
            "estimators": beta,
            "coefficient_names": names,
            "standard_errors": se,
            "other_measures": {
                "t_statistic": tvalues
            },
            "other_info": {
                "p-value": chisqprob(lrts, df),
                "ll_ratio_index": ll_ratio,
                "ll_ratio_test_statistics": lrts,
                "df": df,
                "nobs": nobs
            }
        }