def estimate_dcm(self, data):
        maxiter=self.maximum_iterations #Maximum iterations allowed
        eps=0.001 #Convergence criterion for gradient*hessian-inv*gradient
        tags = ["estimate", "result"]
        vl = 2
        nobs, alts, nvars = data.shape
        depm = self.resources["chosen_choice"] # matrix (nobs x alts) of 0's and 1's. 1 is on positions of chosen location.
        coef_names = self.resources.get("coefficient_names", None)
        fixed_coefs, fixed_values = self.resources.get("fixed_values", (array([]), array([])))
        if (coef_names is not None) and (fixed_coefs.size > 0):
            index_of_fixed_values = get_indices_of_matched_items(coef_names, fixed_coefs)
            index_of_not_fixed_values = ones(nvars, dtype="bool8")
            index_of_not_fixed_values[index_of_fixed_values] = False
            index_of_not_fixed_values = where(index_of_not_fixed_values)[0]
        else:
            index_of_fixed_values = array([], dtype="int32")
            index_of_not_fixed_values = arange(nvars)
#        pdb.set_trace()
        b2=zeros(nvars).astype(float32)
        b2[index_of_fixed_values] = fixed_values.astype(b2.dtype)        
        se = zeros(nvars).astype(float32)
        tvalues = zeros(nvars).astype(float32)
        l_2=self.mnl_loglikelihood(data, b2, depm)
        l_0 = l_2
        s=1
        warnflag = ''

        for it in range(maxiter):
            b1=b2
            l_1=l_2
            g=(self.mnl_gradient(data, b1, depm, index_of_not_fixed_values))
            try:
                h=self.get_hessian(g)
            except:
                msg = "Estimation led to singular matrix. No results."
                warnflag += msg + "\n"
                logger.log_warning(msg, tags=tags, verbosity_level=vl)
                return {}
            g=g.sum(axis=0)
            c=dot(dot(transpose(g),h),g)
            if c <= eps:
                msg = "Convergence achieved."
                logger.log_status(msg, tags=tags, verbosity_level=vl)
                break
            d=dot(h,g)
            b2[index_of_not_fixed_values]=(b1[index_of_not_fixed_values]+s*d).astype(b2.dtype)
            l_2=self.mnl_loglikelihood(data,b2, depm)
            if l_2 <= l_1:
                s=s/2.0
            if s <= .001:
                msg = "Cannot find increase."
                warnflag += msg + "\n"
                #logger.log_warning(msg, tags=tags, verbosity_level=vl)
                break
        # end of the iteration loop
        
        if it>=(maxiter-1):
            msg = "Maximum iterations reached without convergence."
            warnflag += msg + "\n"
            #logger.log_warning(msg, tags=tags, verbosity_level=vl)
 
        se[index_of_not_fixed_values]=self.get_standard_error(h).astype(se.dtype)
        tvalues[index_of_not_fixed_values] = (b1[index_of_not_fixed_values]/se[index_of_not_fixed_values]).astype(tvalues.dtype)
        ll_ratio = 1-(l_1/l_0)
        adj_ll_ratio = 1-((l_1-nvars)/l_0)
        
        # http://en.wikipedia.org/wiki/Akaike_information_criterion 
        aic = 2 * index_of_not_fixed_values.size - 2 * l_1
        bic = -2 * l_1 + index_of_not_fixed_values.size * log(nobs)

        if coef_names is not None:
            names = coef_names
        else:
            names = ['']*index_of_not_fixed_values.size

        est = b1
        df=nvars-index_of_fixed_values.size
        lrts = -2*(l_0-l_1)

        result = {"coefficient_names":names,
                  "estimators":est, 
                  "standard_errors":se, 
                  "other_measures":{"t_statistic": tvalues},
                  "other_info":{"aic": aic,
                                "bic": bic,
                                "p-value":chisqprob(lrts, df),
                                "l_0": l_0,
                                "l_1": l_1,
                                "ll_ratio_index":ll_ratio,
                                "ll_ratio_test_statistics":lrts,
                                "convergence": c,
                                "df": df,  
                                "nobs":nobs,
                                "nvars": nvars,
                                "nalts": alts,
                                "iterations": it+1
                                },
                  "warnflag": warnflag}
        self.print_results(result)
        
        return result
    def estimate_dcm(self, data):
        nobs, alts, nvars, M = data.shape
        self.M = M
        depm = self.resources["chosen_choice"] # matrix (nobs x alts) of 0's and 1's. 1 is on positions of chosen location.
        tags = ["estimate", "result"]
        vl = 2
        coef_names = self.resources.get("coefficient_names", None)
        nest_numbers = self.get_nest_numbers()
        
        index_of_fixed_values = zeros(nvars+M, dtype="bool8")
        fixed_coefs, fixed_values = self.resources.get("fixed_values", (array([]), array([])))
        if (coef_names is not None) and (fixed_coefs.size > 0):
            index_of_fixed_values[get_indices_of_matched_items(coef_names, fixed_coefs)] = True
        index_of_not_fixed_values = logical_not(index_of_fixed_values)
        
        beta=ones(nvars+M).astype(float32)
        beta[-M:] = self.range_mu[1]
        beta[index_of_fixed_values] = fixed_values.astype(beta.dtype)
        l_0beta = zeros(nvars+M).astype(float32)
        l_0beta[-M:] = 1
        l_0 = self.nl_loglikelihood(l_0beta, data, depm)

        ls_idx = arange(nvars, nvars+M)
        for name, sv in self.resources.get("starting_values", {}).iteritems():
            est = True
            if isinstance(sv, tuple) or isinstance(sv, list):
                est = sv[1]
                sv = sv[0]
            if name.startswith('__logsum_'):
                if nest_numbers is not None:
                    idx = ls_idx[where(nest_numbers == int(name[9:]))[0]]
                else:
                    idx = array([ls_idx[int(name[9:])-1]])
            else:
                idx = ematch(coef_names, name)
            beta[idx] = sv
            index_of_fixed_values[idx] = not(est)

        index_of_not_fixed_values = where(logical_not(index_of_fixed_values))[0] 
        index_of_fixed_values = where(index_of_fixed_values)[0]
        
        bounds = index_of_not_fixed_values.size*[(-5.0,5.0)]
        j=0
        for i in range(nvars+M-1, nvars-1, -1):
            if i in index_of_not_fixed_values:
                bounds[index_of_not_fixed_values.size-j-1] = self.range_mu
                j+=1
                
        logger.start_block('BFGS procedure')
        bfgs_result = fmin_l_bfgs_b(self.minus_nl_loglikelihood, beta[index_of_not_fixed_values], pgtol=.01,
                                args=(data, depm, beta[index_of_fixed_values], index_of_not_fixed_values, index_of_fixed_values), 
                                bounds=bounds,approx_grad=True,
                                disp=True, epsilon=self.resources.get('bfgs_epsilon', self._epsilon),
                                )

        logger.end_block()
        beta[index_of_not_fixed_values] = bfgs_result[0].astype(beta.dtype)
        se = zeros(nvars+M)
        tvalues = zeros(nvars+M)
        mingrad = bfgs_result[2]['grad']

        if 0: # hessian is no longer provided by bfgs ## not self.resources.get('bfgs_approximate_second_derivative', self._approximate_second_derivative):
            inv_hessian = bfgs_result[3]
            se[index_of_not_fixed_values] = sqrt(diagonal(inv_hessian))
        else:
            sec_der = approximate_second_derivative(self.minus_nl_loglikelihood, beta[index_of_not_fixed_values],
                                                args=(data, depm, beta[index_of_fixed_values], index_of_not_fixed_values, 
                                                      index_of_fixed_values))
            inv_hessian = 1.0/sec_der
            se[index_of_not_fixed_values] = sqrt(inv_hessian)
        
        tvalues[index_of_not_fixed_values] = beta[index_of_not_fixed_values]/se[index_of_not_fixed_values]
        
        l_1=self.nl_loglikelihood(beta, data, depm)

        ll_ratio = 1-(l_1/l_0)
        adj_ll_ratio = 1-((l_1-nvars-M)/l_0)
        
        # http://en.wikipedia.org/wiki/Akaike_information_criterion 
        aic = 2 * index_of_not_fixed_values.size - 2 * l_1
        logger.log_status("Akaike's Information Criterion (AIC): ", str(aic), tags=tags, verbosity=vl)
        bic = -2 * l_1 + index_of_not_fixed_values.size * log(nobs)
        logger.log_status("Bayesian Information Criterion (BIC): ", str(bic), tags=tags, verbosity=vl)
        logger.log_status("***********************************************", tags=tags, verbosity_level=vl)
        logger.log_status('Log-likelihood is:           ', l_1, tags=tags, verbosity_level=vl)
        logger.log_status('Null Log-likelihood is:      ', l_0, tags=tags, verbosity_level=vl)
        logger.log_status('Likelihood ratio index:      ', ll_ratio, tags=tags, verbosity_level=vl)
        logger.log_status('Adj. likelihood ratio index: ', adj_ll_ratio, tags=tags, verbosity_level=vl)
        logger.log_status('Number of observations:      ', nobs, tags=tags, verbosity_level=vl)
        logger.log_status('Suggested |t-value| >        ', sqrt(log(nobs)))
        logger.log_status('WARNING: Standard errors printed below are approximated')
        logger.log_status("-----------------------------------------------", tags=tags, verbosity_level=vl)
        if coef_names is not None:
            nestn = nest_numbers
            if nestn is None:
                nestn = range(1,M+1)
            names = concatenate((coef_names, array(map(lambda x: '__logsum_%s' % x, nestn))))
        else:
            names = ['']*(nvars+M)
        logger.log_status("Coeff_names\testimate\tstd err\t\tt-values\tgradient", tags=tags, verbosity_level=vl)
        for i in range(index_of_not_fixed_values.size):
            logger.log_status("%10s\t%8g\t%8g\t%8g\t%8g" % (names[index_of_not_fixed_values[i]],
                                                            beta[index_of_not_fixed_values[i]],
                                                            se[index_of_not_fixed_values[i]],
                                                            tvalues[index_of_not_fixed_values[i]], mingrad[i]), 
                              tags=tags, verbosity_level=vl)
        logger.log_status('***********************************************', tags=tags, verbosity_level=vl)
        logger.log_status('Elapsed time: ',time.clock()-self.start_time, 'seconds', tags=tags, verbosity_level=vl)
        df=nvars+M-index_of_fixed_values.size
        lrts = -2*(l_0-l_1)
        return {"estimators":beta, "coefficient_names": names, "standard_errors":se, "other_measures":{"t_statistic": tvalues},
                 "other_info":{"p-value":chisqprob(lrts, df),
                    "ll_ratio_index":ll_ratio,
                    "ll_ratio_test_statistics":lrts, "df": df,  "nobs":nobs}}
    def estimate_dcm(self, data):
        maxiter = self.maximum_iterations  #Maximum iterations allowed
        eps = 0.001  #Convergence criterion for gradient*hessian-inv*gradient
        tags = ["estimate", "result"]
        vl = 2
        nobs, alts, nvars = data.shape
        depm = self.resources[
            "chosen_choice"]  # matrix (nobs x alts) of 0's and 1's. 1 is on positions of chosen location.
        coef_names = self.resources.get("coefficient_names", None)
        fixed_coefs, fixed_values = self.resources.get("fixed_values",
                                                       (array([]), array([])))
        if (coef_names is not None) and (fixed_coefs.size > 0):
            index_of_fixed_values = get_indices_of_matched_items(
                coef_names, fixed_coefs)
            index_of_not_fixed_values = ones(nvars, dtype="bool8")
            index_of_not_fixed_values[index_of_fixed_values] = False
            index_of_not_fixed_values = where(index_of_not_fixed_values)[0]
        else:
            index_of_fixed_values = array([], dtype="int32")
            index_of_not_fixed_values = arange(nvars)
#        pdb.set_trace()
        b2 = zeros(nvars).astype(float32)
        b2[index_of_fixed_values] = fixed_values.astype(b2.dtype)
        se = zeros(nvars).astype(float32)
        tvalues = zeros(nvars).astype(float32)
        l_2 = self.mnl_loglikelihood(data, b2, depm)
        l_0 = l_2
        s = 1
        warnflag = ''

        for it in range(maxiter):
            b1 = b2
            l_1 = l_2
            g = (self.mnl_gradient(data, b1, depm, index_of_not_fixed_values))
            try:
                h = self.get_hessian(g)
            except:
                msg = "Estimation led to singular matrix. No results."
                warnflag += msg + "\n"
                logger.log_warning(msg, tags=tags, verbosity_level=vl)
                return {}
            g = g.sum(axis=0)
            c = dot(dot(transpose(g), h), g)
            if c <= eps:
                msg = "Convergence achieved."
                logger.log_status(msg, tags=tags, verbosity_level=vl)
                break
            d = dot(h, g)
            b2[index_of_not_fixed_values] = (b1[index_of_not_fixed_values] +
                                             s * d).astype(b2.dtype)
            l_2 = self.mnl_loglikelihood(data, b2, depm)
            if l_2 <= l_1:
                s = s / 2.0
            if s <= .001:
                msg = "Cannot find increase."
                warnflag += msg + "\n"
                #logger.log_warning(msg, tags=tags, verbosity_level=vl)
                break
        # end of the iteration loop

        if it >= (maxiter - 1):
            msg = "Maximum iterations reached without convergence."
            warnflag += msg + "\n"
            #logger.log_warning(msg, tags=tags, verbosity_level=vl)

        se[index_of_not_fixed_values] = self.get_standard_error(h).astype(
            se.dtype)
        tvalues[index_of_not_fixed_values] = (
            b1[index_of_not_fixed_values] /
            se[index_of_not_fixed_values]).astype(tvalues.dtype)
        ll_ratio = 1 - (l_1 / l_0)
        adj_ll_ratio = 1 - ((l_1 - nvars) / l_0)

        # http://en.wikipedia.org/wiki/Akaike_information_criterion
        aic = 2 * index_of_not_fixed_values.size - 2 * l_1
        bic = -2 * l_1 + index_of_not_fixed_values.size * log(nobs)

        if coef_names is not None:
            names = coef_names
        else:
            names = [''] * index_of_not_fixed_values.size

        est = b1
        df = nvars - index_of_fixed_values.size
        lrts = -2 * (l_0 - l_1)

        result = {
            "coefficient_names": names,
            "estimators": est,
            "standard_errors": se,
            "other_measures": {
                "t_statistic": tvalues
            },
            "other_info": {
                "aic": aic,
                "bic": bic,
                "p-value": chisqprob(lrts, df),
                "l_0": l_0,
                "l_1": l_1,
                "ll_ratio_index": ll_ratio,
                "ll_ratio_test_statistics": lrts,
                "convergence": c,
                "df": df,
                "nobs": nobs,
                "nvars": nvars,
                "nalts": alts,
                "iterations": it + 1
            },
            "warnflag": warnflag
        }
        self.print_results(result)

        return result
Exemple #4
0
    def estimate_dcm(self, data):
        maxiter = self.maximum_iterations  #Maximum iterations allowed
        eps = 0.001  #Convergence criterion for gradient*hessian-inv*gradient
        tags = ["estimate", "result"]
        vl = 2
        nobs, alts, nvars = data.shape
        depm = self.resources[
            "chosen_choice"]  # matrix (nobs x alts) of 0's and 1's. 1 is on positions of chosen location.
        coef_names = self.resources.get("coefficient_names", None)
        fixed_coefs, fixed_values = self.resources.get("fixed_values",
                                                       (array([]), array([])))
        if (coef_names is not None) and (fixed_coefs.size > 0):
            index_of_fixed_values = get_indices_of_matched_items(
                coef_names, fixed_coefs)
            index_of_not_fixed_values = ones(nvars, dtype="bool8")
            index_of_not_fixed_values[index_of_fixed_values] = False
            index_of_not_fixed_values = where(index_of_not_fixed_values)[0]
        else:
            index_of_fixed_values = array([], dtype="int32")
            index_of_not_fixed_values = arange(nvars)
#        pdb.set_trace()
        b2 = zeros(nvars).astype(float32)
        b2[index_of_fixed_values] = fixed_values.astype(b2.dtype)
        se = zeros(nvars).astype(float32)
        tvalues = zeros(nvars).astype(float32)
        l_2 = self.mnl_loglikelihood(data, b2, depm)
        l_0 = l_2
        s = 1

        for it in range(maxiter):
            b1 = b2
            l_1 = l_2
            g = (self.mnl_gradient(data, b1, depm, index_of_not_fixed_values))
            try:
                h = self.get_hessian(g)
            except:
                logger.log_warning(
                    "Estimation led to singular matrix. No results.",
                    tags=tags,
                    verbosity_level=vl)
                return {}
            g = g.sum(axis=0)
            c = dot(dot(transpose(g), h), g)
            if c <= eps:
                logger.log_status('Convergence achieved.',
                                  tags=tags,
                                  verbosity_level=vl)
                break
            d = dot(h, g)
            b2[index_of_not_fixed_values] = (b1[index_of_not_fixed_values] +
                                             s * d).astype(b2.dtype)
            l_2 = self.mnl_loglikelihood(data, b2, depm)
            if l_2 <= l_1:
                s = s / 2.0
            if s <= .001:
                logger.log_warning('Cannot find increase',
                                   tags=tags,
                                   verbosity_level=vl)
                break
        # end of the iteration loop

        if it >= (maxiter - 1):
            logger.log_warning(
                'Maximum iterations reached without convergence',
                tags=tags,
                verbosity_level=vl)

        se[index_of_not_fixed_values] = self.get_standard_error(h).astype(
            se.dtype)
        tvalues[index_of_not_fixed_values] = (
            b1[index_of_not_fixed_values] /
            se[index_of_not_fixed_values]).astype(tvalues.dtype)
        ll_ratio = 1 - (l_1 / l_0)
        adj_ll_ratio = 1 - ((l_1 - nvars) / l_0)

        # http://en.wikipedia.org/wiki/Akaike_information_criterion
        aic = 2 * index_of_not_fixed_values.size - 2 * l_1
        logger.log_status("Akaike's Information Criterion (AIC): ",
                          str(aic),
                          tags=tags,
                          verbosity=vl)
        bic = -2 * l_1 + index_of_not_fixed_values.size * log(nobs)
        logger.log_status("Bayesian Information Criterion (BIC): ",
                          str(bic),
                          tags=tags,
                          verbosity=vl)

        logger.log_status("Number of Iterations: ",
                          it + 1,
                          tags=tags,
                          verbosity_level=vl)
        logger.log_status("***********************************************",
                          tags=tags,
                          verbosity_level=vl)
        logger.log_status('Log-likelihood is:           ',
                          l_1,
                          tags=tags,
                          verbosity_level=vl)
        logger.log_status('Null Log-likelihood is:      ',
                          l_0,
                          tags=tags,
                          verbosity_level=vl)
        logger.log_status('Likelihood ratio index:      ',
                          ll_ratio,
                          tags=tags,
                          verbosity_level=vl)
        logger.log_status('Adj. likelihood ratio index: ',
                          adj_ll_ratio,
                          tags=tags,
                          verbosity_level=vl)
        logger.log_status('Number of observations:      ',
                          nobs,
                          tags=tags,
                          verbosity_level=vl)
        logger.log_status('Suggested |t-value| >        ', sqrt(log(nobs)))
        logger.log_status('Convergence statistic is:    ',
                          c,
                          tags=tags,
                          verbosity_level=vl)
        logger.log_status("-----------------------------------------------",
                          tags=tags,
                          verbosity_level=vl)
        if coef_names is not None:
            names = coef_names
        else:
            names = [''] * index_of_not_fixed_values.size
        logger.log_status("Coeff_names\testimate\tstd err\t\tt-values",
                          tags=tags,
                          verbosity_level=vl)
        for i in index_of_not_fixed_values:
            logger.log_status("%10s\t%8g\t%8g\t%8g" %
                              (names[i], b1[i], se[i], tvalues[i]),
                              tags=tags,
                              verbosity_level=vl)
        logger.log_status('***********************************************',
                          tags=tags,
                          verbosity_level=vl)
        logger.log_status('Elapsed time: ',
                          time.clock() - self.start_time,
                          'seconds',
                          tags=tags,
                          verbosity_level=vl)
        est = b1
        df = nvars - index_of_fixed_values.size
        lrts = -2 * (l_0 - l_1)
        return {
            "estimators": est,
            "standard_errors": se,
            "other_measures": {
                "t_statistic": tvalues
            },
            "other_info": {
                "p-value": chisqprob(lrts, df),
                "ll_ratio_index": ll_ratio,
                "ll_ratio_test_statistics": lrts,
                "df": df,
                "nobs": nobs
            }
        }
    def estimate_dcm(self, data):
        maxiter=self.maximum_iterations #Maximum iterations allowed
        eps=0.001 #Convergence criterion for gradient*hessian-inv*gradient
        tags = ["estimate", "result"]
        vl = 2
        nobs, alts, nvars = data.shape
        depm = self.resources["chosen_choice"] # matrix (nobs x alts) of 0's and 1's. 1 is on positions of chosen location.
        coef_names = self.resources.get("coefficient_names", None)
        fixed_coefs, fixed_values = self.resources.get("fixed_values", (array([]), array([])))
        if (coef_names is not None) and (fixed_coefs.size > 0):
            index_of_fixed_values = get_indices_of_matched_items(coef_names, fixed_coefs)
            index_of_not_fixed_values = ones(nvars, dtype="bool8")
            index_of_not_fixed_values[index_of_fixed_values] = False
            index_of_not_fixed_values = where(index_of_not_fixed_values)[0]
        else:
            index_of_fixed_values = array([], dtype="int32")
            index_of_not_fixed_values = arange(nvars)
#        pdb.set_trace()
        b2=zeros(nvars).astype(float32)
        b2[index_of_fixed_values] = fixed_values.astype(b2.dtype)        
        se = zeros(nvars).astype(float32)
        tvalues = zeros(nvars).astype(float32)
        l_2=self.mnl_loglikelihood(data, b2, depm)
        l_0 = l_2
        s=1

        for it in range(maxiter):
            b1=b2
            l_1=l_2
            g=(self.mnl_gradient(data, b1, depm, index_of_not_fixed_values))
            try:
                h=self.get_hessian(g)
            except:
                logger.log_warning("Estimation led to singular matrix. No results.", tags=tags, verbosity_level=vl)
                return {}
            g=g.sum(axis=0)
            c=dot(dot(transpose(g),h),g)
            if c <= eps:
                logger.log_status('Convergence achieved.', tags=tags, verbosity_level=vl)
                break
            d=dot(h,g)
            b2[index_of_not_fixed_values]=(b1[index_of_not_fixed_values]+s*d).astype(b2.dtype)
            l_2=self.mnl_loglikelihood(data,b2, depm)
            if l_2 <= l_1:
                s=s/2.0
            if s <= .001:
                logger.log_warning('Cannot find increase', tags=tags, verbosity_level=vl)
                break
        # end of the iteration loop
        
        if it>=(maxiter-1):
            logger.log_warning('Maximum iterations reached without convergence', tags=tags, verbosity_level=vl)
 
        se[index_of_not_fixed_values]=self.get_standard_error(h).astype(se.dtype)
        tvalues[index_of_not_fixed_values] = (b1[index_of_not_fixed_values]/se[index_of_not_fixed_values]).astype(tvalues.dtype)
        ll_ratio = 1-(l_1/l_0)
        adj_ll_ratio = 1-((l_1-nvars)/l_0)
        
        # http://en.wikipedia.org/wiki/Akaike_information_criterion 
        aic = 2 * index_of_not_fixed_values.size - 2 * l_1
        logger.log_status("Akaike's Information Criterion (AIC): ", str(aic), tags=tags, verbosity=vl)
        bic = -2 * l_1 + index_of_not_fixed_values.size * log(nobs)
        logger.log_status("Bayesian Information Criterion (BIC): ", str(bic), tags=tags, verbosity=vl)
        
        logger.log_status("Number of Iterations: ", it+1, tags=tags, verbosity_level=vl)
        logger.log_status("***********************************************", tags=tags, verbosity_level=vl)
        logger.log_status('Log-likelihood is:           ', l_1, tags=tags, verbosity_level=vl)
        logger.log_status('Null Log-likelihood is:      ', l_0, tags=tags, verbosity_level=vl)
        logger.log_status('Likelihood ratio index:      ', ll_ratio, tags=tags, verbosity_level=vl)
        logger.log_status('Adj. likelihood ratio index: ', adj_ll_ratio, tags=tags, verbosity_level=vl)
        logger.log_status('Number of observations:      ', nobs, tags=tags, verbosity_level=vl)
        logger.log_status('Suggested |t-value| >        ', sqrt(log(nobs)))
        logger.log_status('Convergence statistic is:    ', c, tags=tags, verbosity_level=vl)
        logger.log_status("-----------------------------------------------", tags=tags, verbosity_level=vl)
        if coef_names is not None:
            names = coef_names
        else:
            names = ['']*index_of_not_fixed_values.size
        logger.log_status("Coeff_names\testimate\tstd err\t\tt-values", tags=tags, verbosity_level=vl)
        for i in index_of_not_fixed_values:
            logger.log_status("%10s\t%8g\t%8g\t%8g" % (names[i],b1[i],se[i],tvalues[i]), tags=tags, verbosity_level=vl)
        logger.log_status('***********************************************', tags=tags, verbosity_level=vl)
        logger.log_status('Elapsed time: ',time.clock()-self.start_time, 'seconds', tags=tags, verbosity_level=vl)
        est = b1
        df=nvars-index_of_fixed_values.size
        lrts = -2*(l_0-l_1)
        return {"estimators":est, "standard_errors":se, "other_measures":{"t_statistic": tvalues},
                 "other_info":{"p-value":chisqprob(lrts, df),
                    "ll_ratio_index":ll_ratio,
                    "ll_ratio_test_statistics":lrts, "df": df,  "nobs":nobs}}
Exemple #6
0
    def estimate_dcm(self, data):
        nobs, alts, nvars, M = data.shape
        self.M = M
        depm = self.resources[
            "chosen_choice"]  # matrix (nobs x alts) of 0's and 1's. 1 is on positions of chosen location.
        tags = ["estimate", "result"]
        vl = 2
        coef_names = self.resources.get("coefficient_names", None)
        nest_numbers = self.get_nest_numbers()

        index_of_fixed_values = zeros(nvars + M, dtype="bool8")
        fixed_coefs, fixed_values = self.resources.get("fixed_values",
                                                       (array([]), array([])))
        if (coef_names is not None) and (fixed_coefs.size > 0):
            index_of_fixed_values[get_indices_of_matched_items(
                coef_names, fixed_coefs)] = True
        index_of_not_fixed_values = logical_not(index_of_fixed_values)

        beta = zeros(nvars + M).astype(float32)
        beta[-M:] = self.range_mu[1]
        beta[index_of_fixed_values] = fixed_values.astype(beta.dtype)
        l_0 = self.nl_loglikelihood(beta, data, depm)

        ls_idx = arange(nvars, nvars + M)
        for name, sv in self.resources.get("starting_values", {}).iteritems():
            est = True
            if isinstance(sv, tuple) or isinstance(sv, list):
                est = sv[1]
                sv = sv[0]
            if name.startswith('__logsum_'):
                if nest_numbers is not None:
                    idx = ls_idx[where(nest_numbers == int(name[9:]))[0]]
                else:
                    idx = array([ls_idx[int(name[9:]) - 1]])
            else:
                idx = ematch(coef_names, name)
            beta[idx] = sv
            index_of_fixed_values[idx] = not (est)

        index_of_not_fixed_values = where(
            logical_not(index_of_fixed_values))[0]
        index_of_fixed_values = where(index_of_fixed_values)[0]

        bounds = index_of_not_fixed_values.size * [(None, None)]
        j = 0
        for i in range(nvars + M - 1, nvars - 1, -1):
            if i in index_of_not_fixed_values:
                bounds[index_of_not_fixed_values.size - j - 1] = self.range_mu
                j += 1

        logger.start_block('BFGS procedure')
        bfgs_result = fmin_bfgs(
            self.minus_nl_loglikelihood,
            beta[index_of_not_fixed_values],
            args=(data, depm, beta[index_of_fixed_values],
                  index_of_not_fixed_values, index_of_fixed_values),
            full_output=True,
            disp=True,
            epsilon=self.resources.get('bfgs_epsilon', self._epsilon),
        )

        logger.end_block()
        beta[index_of_not_fixed_values] = bfgs_result[0].astype(beta.dtype)
        se = zeros(nvars + M)
        tvalues = zeros(nvars + M)
        mingrad = bfgs_result[2]

        if not self.resources.get('bfgs_approximate_second_derivative',
                                  self._approximate_second_derivative):
            inv_hessian = bfgs_result[3]
            se[index_of_not_fixed_values] = sqrt(diagonal(inv_hessian))
        else:
            sec_der = approximate_second_derivative(
                self.minus_nl_loglikelihood,
                beta[index_of_not_fixed_values],
                args=(data, depm, beta[index_of_fixed_values],
                      index_of_not_fixed_values, index_of_fixed_values))
            inv_hessian = 1.0 / sec_der
            se[index_of_not_fixed_values] = sqrt(inv_hessian)

        tvalues[index_of_not_fixed_values] = beta[
            index_of_not_fixed_values] / se[index_of_not_fixed_values]

        l_1 = self.nl_loglikelihood(beta, data, depm)

        ll_ratio = 1 - (l_1 / l_0)
        adj_ll_ratio = 1 - ((l_1 - nvars - M) / l_0)

        # http://en.wikipedia.org/wiki/Akaike_information_criterion
        aic = 2 * index_of_not_fixed_values.size - 2 * l_1
        logger.log_status("Akaike's Information Criterion (AIC): ",
                          str(aic),
                          tags=tags,
                          verbosity=vl)
        bic = -2 * l_1 + index_of_not_fixed_values.size * log(nobs)
        logger.log_status("Bayesian Information Criterion (BIC): ",
                          str(bic),
                          tags=tags,
                          verbosity=vl)
        logger.log_status("***********************************************",
                          tags=tags,
                          verbosity_level=vl)
        logger.log_status('Log-likelihood is:           ',
                          l_1,
                          tags=tags,
                          verbosity_level=vl)
        logger.log_status('Null Log-likelihood is:      ',
                          l_0,
                          tags=tags,
                          verbosity_level=vl)
        logger.log_status('Likelihood ratio index:      ',
                          ll_ratio,
                          tags=tags,
                          verbosity_level=vl)
        logger.log_status('Adj. likelihood ratio index: ',
                          adj_ll_ratio,
                          tags=tags,
                          verbosity_level=vl)
        logger.log_status('Number of observations:      ',
                          nobs,
                          tags=tags,
                          verbosity_level=vl)
        logger.log_status('Suggested |t-value| >        ', sqrt(log(nobs)))
        logger.log_status("-----------------------------------------------",
                          tags=tags,
                          verbosity_level=vl)
        if coef_names is not None:
            nestn = nest_numbers
            if nestn is None:
                nestn = range(1, M + 1)
            names = concatenate(
                (coef_names, array(map(lambda x: '__logsum_%s' % x, nestn))))
        else:
            names = [''] * (nvars + M)
        logger.log_status(
            "Coeff_names\testimate\tstd err\t\tt-values\tgradient",
            tags=tags,
            verbosity_level=vl)
        for i in range(index_of_not_fixed_values.size):
            logger.log_status(
                "%10s\t%8g\t%8g\t%8g\t%8g" %
                (names[index_of_not_fixed_values[i]],
                 beta[index_of_not_fixed_values[i]],
                 se[index_of_not_fixed_values[i]],
                 tvalues[index_of_not_fixed_values[i]], mingrad[i]),
                tags=tags,
                verbosity_level=vl)
        logger.log_status('***********************************************',
                          tags=tags,
                          verbosity_level=vl)
        logger.log_status('Elapsed time: ',
                          time.clock() - self.start_time,
                          'seconds',
                          tags=tags,
                          verbosity_level=vl)
        df = nvars + M - index_of_fixed_values.size
        lrts = -2 * (l_0 - l_1)
        return {
            "estimators": beta,
            "coefficient_names": names,
            "standard_errors": se,
            "other_measures": {
                "t_statistic": tvalues
            },
            "other_info": {
                "p-value": chisqprob(lrts, df),
                "ll_ratio_index": ll_ratio,
                "ll_ratio_test_statistics": lrts,
                "df": df,
                "nobs": nobs
            }
        }