Python chisqprob Beispiele, opus_core.third_party.pstat.chisqprob Python Beispiele

Beispiel #1

0

Datei anzeigen

Datei: coefficients.py Projekt: christianurich/VIBe2UrbanSim

 def test_sample_coefficients_mixed_distr(self):
     """ 2 coefficients are sampled from different distributions, one stays the same. """
     
     from opus_core.third_party.pstat import chisqprob
     coef_values = array([1, 0.5, 0.2], dtype="float32")
     se = array([0, 0.02, 0.001], dtype="float32")
     coef = Coefficients(names=array(["coef_uniform", "coef_const", "coef_normal"]), values = coef_values,
                         standard_errors = se)
     sampling_dict = {"coef_uniform": {"distribution": "uniform",
                                       "parameters": {"a": 1, "b": 1}
                                       },
                     "coef_normal": {"distribution": "normal",
                                     "parameters": {"multiplicator": 10}
                                       }
                     }
     # for coefficient 1 and 3 run a one-sided Chi^2 test
     expected_values = coef_values
     TSU = 0
     TSN = 0
     df = 9
     significance_level = 0.05
     for j in range(df+1):
         new_coef = coef.sample_values(distribution_dictionary=sampling_dict)
         values = new_coef.get_values()
         TSU += ((values[0] - expected_values[0])**2)/expected_values[0]
         TSN += ((values[2] - expected_values[2])/(se[2]*10))**2
         self.assertEqual(ma.allclose(new_coef.get_values()[1], expected_values[1]), True)
         
     prob = chisqprob(TSU, df)
     if (prob < significance_level/2.0):
         self.fail(msg="prob=%f is not in [%f,%f]" % (prob, significance_level/2.0, 1-significance_level/2.0))
     prob = chisqprob(TSN, df)
     if (prob < significance_level/2.0):
         self.fail(msg="prob=%f is not in [%f,%f]" % (prob, significance_level/2.0, 1-significance_level/2.0))

Beispiel #2

0

Datei anzeigen

    def test_sample_coefficients_mixed_distr(self):
        """ 2 coefficients are sampled from different distributions, one stays the same. """

        from opus_core.third_party.pstat import chisqprob
        coef_values = array([1, 0.5, 0.2], dtype="float32")
        se = array([0, 0.02, 0.001], dtype="float32")
        coef = Coefficients(names=array(
            ["coef_uniform", "coef_const", "coef_normal"]),
                            values=coef_values,
                            standard_errors=se)
        sampling_dict = {
            "coef_uniform": {
                "distribution": "uniform",
                "parameters": {
                    "a": 1,
                    "b": 1
                }
            },
            "coef_normal": {
                "distribution": "normal",
                "parameters": {
                    "multiplicator": 10
                }
            }
        }
        # for coefficient 1 and 3 run a one-sided Chi^2 test
        expected_values = coef_values
        TSU = 0
        TSN = 0
        df = 9
        significance_level = 0.05
        for j in range(df + 1):
            new_coef = coef.sample_values(
                distribution_dictionary=sampling_dict)
            values = new_coef.get_values()
            TSU += ((values[0] - expected_values[0])**2) / expected_values[0]
            TSN += ((values[2] - expected_values[2]) / (se[2] * 10))**2
            self.assertEqual(
                ma.allclose(new_coef.get_values()[1], expected_values[1]),
                True)

        prob = chisqprob(TSU, df)
        if (prob < significance_level / 2.0):
            self.fail(
                msg="prob=%f is not in [%f,%f]" %
                (prob, significance_level / 2.0, 1 - significance_level / 2.0))
        prob = chisqprob(TSN, df)
        if (prob < significance_level / 2.0):
            self.fail(
                msg="prob=%f is not in [%f,%f]" %
                (prob, significance_level / 2.0, 1 - significance_level / 2.0))

Beispiel #3

0

Datei anzeigen

Datei: stochastic_test_case.py Projekt: emiliom/DRCOG_Urbansim

 def chi_square_test_with_known_mean(self,
                                     function,
                                     mean,
                                     variance,
                                     number_of_iterations,
                                     significance_level=0.01,
                                     number_of_tries=5):
     """For each test, run a two-sided Chi^2 test for sigma = sigma_0 vs. sigma != sigma_0, if means are known.
     'mean' and 'variance' are arrays whose length must correspond to the array that the given function produces.
     Since the stochastic test will fail every once in a while, run the whole
     test up to number_of_tries times, until either it succeeds or it fails too many times.
     """
     for j in range(number_of_tries):
         K = mean.size
         x = zeros((number_of_iterations, K), dtype=float32)
         for i in range(number_of_iterations):
             x[i, :] = function()
         stat = (((x - mean)**2.0) / variance).sum()
         prob = chisqprob(stat, K * number_of_iterations)
         logger.log_status(
             "Stochastic Test: Chi^2 test statistic = " + str(stat) +
             ", df=", str(K * number_of_iterations), ", p=" + str(prob))
         if (prob >= significance_level / 2.0) and (
                 prob <= (1 - significance_level / 2.0)):
             # test succeeded -- jump out of the method
             return
     # test failed more than number_of_tries times
     self.fail(
         msg="prob=%f is not in [%f,%f]" %
         (prob, significance_level / 2.0, 1 - significance_level / 2.0))

Beispiel #4

0

Datei anzeigen

    def test_sample_uniform_coefficients(self):
        """Coefficients are sampled from U(x-0.5, x+0.5), where x is the coefficient value."""

        from opus_core.third_party.pstat import chisqprob
        coef_values = array([0.5, -0.00001], dtype="float32")
        coef = Coefficients(names=array(["coef1", "coef2"]),
                            values=coef_values)

        # for each coefficient run a one-sided Chi^2 test
        expected_values = coef_values
        TS = zeros(coef_values.size)
        df = 9
        significance_level = 0.05
        for j in range(df + 1):
            new_coef = coef.sample_values(distribution='uniform')
            values = new_coef.get_values()
            TS += ((values - expected_values)**2) / expected_values

        for i in range(values.size):
            prob = chisqprob(TS[i], df)
            if (prob < significance_level / 2.0):
                self.fail(msg="prob=%f is not in [%f,%f]" %
                          (prob, significance_level / 2.0,
                           1 - significance_level / 2.0))

        # check data type
        self.assert_(values.dtype.name == "float32",
                     msg="Error in coefficients data type.")

Beispiel #5

0

Datei anzeigen

Datei: stochastic_test_case.py Projekt: psrc/urbansim

 def _run_stochastic_test_poisson(
     self, function, expected_results, number_of_iterations, significance_level=0.01, transformation=None
 ):
     """
     Run the given function for the specified number_of_iterations.
     Uses Bayesian statistics to determine whether the produced results are
     within the specified significance_level of the expected_results.
     """
     K = expected_results.size
     sum_y = zeros(K, dtype=float32)
     x_kr = zeros((number_of_iterations, K), dtype=float32)
     for i in range(number_of_iterations):
         y_r = function()
         x_kr[i, :] = try_transformation(y_r, transformation)
         sum_y = sum_y + x_kr[i, :]
     lambdak = sum_y / float(number_of_iterations)
     lambdanull = try_transformation(expected_results.astype(float32), transformation)
     #        print lambdak
     #        print lambdanull
     sumxk = sum(x_kr, axis=0)
     LRTS = 2.0 * (
         (number_of_iterations * (lambdanull - lambdak).sum())
         + (ln(lambdak / ma.masked_where(lambdanull == 0, lambdanull)) * sumxk).sum()
     )
     prob = chisqprob(LRTS, K)
     # print LRTS, prob
     logger.log_status("Stochastic Test Poisson: LRTS=" + str(LRTS) + ", df=", str(K), ", p=" + str(prob))
     return (prob >= significance_level, "prob=%f < significance level of %f" % (prob, significance_level))

Beispiel #6

0

Datei anzeigen

    def test_sample_uniform_0_1_coefficients(self):
        """ All coefficients are sampled from U(5,10). """

        from opus_core.third_party.pstat import chisqprob
        coef_values = array([0, 0], dtype="float32")
        coef = Coefficients(names=array(["coef1", "coef2"]),
                            values=coef_values)

        # for each coefficient run a one-sided Chi^2 test
        expected_values = array([7.5, 7.5])
        TS = zeros(coef_values.size)
        df = 9
        significance_level = 0.05
        for j in range(df + 1):
            new_coef = coef.sample_values(distribution='uniform',
                                          center_around_value=False,
                                          a=5,
                                          b=10)
            values = new_coef.get_values()
            TS += ((values - expected_values)**2) / expected_values

        for i in range(values.size):
            prob = chisqprob(TS[i], df)
            if (prob < significance_level / 2.0):
                self.fail(msg="prob=%f is not in [%f,%f]" %
                          (prob, significance_level / 2.0,
                           1 - significance_level / 2.0))

Beispiel #7

0

Datei anzeigen

Datei: stochastic_test_case.py Projekt: psrc/urbansim

 def chi_square_test_with_known_mean(
     self, function, mean, variance, number_of_iterations, significance_level=0.01, number_of_tries=5
 ):
     """For each test, run a two-sided Chi^2 test for sigma = sigma_0 vs. sigma != sigma_0, if means are known.
     'mean' and 'variance' are arrays whose length must correspond to the array that the given function produces.
     Since the stochastic test will fail every once in a while, run the whole
     test up to number_of_tries times, until either it succeeds or it fails too many times.
     """
     for j in range(number_of_tries):
         K = mean.size
         x = zeros((number_of_iterations, K), dtype=float32)
         for i in range(number_of_iterations):
             x[i, :] = function()
         stat = (((x - mean) ** 2.0) / variance).sum()
         prob = chisqprob(stat, K * number_of_iterations)
         logger.log_status(
             "Stochastic Test: Chi^2 test statistic = " + str(stat) + ", df=",
             str(K * number_of_iterations),
             ", p=" + str(prob),
         )
         if (prob >= significance_level / 2.0) and (prob <= (1 - significance_level / 2.0)):
             # test succeeded -- jump out of the method
             return
     # test failed more than number_of_tries times
     self.fail(msg="prob=%f is not in [%f,%f]" % (prob, significance_level / 2.0, 1 - significance_level / 2.0))

Beispiel #8

0

Datei anzeigen

Datei: stochastic_test_case.py Projekt: emiliom/DRCOG_Urbansim

 def compute_stochastic_test_normal(self,
                                    function,
                                    expected_results,
                                    number_of_iterations,
                                    significance_level=0.01,
                                    transformation="sqrt"):
     K = expected_results.size
     sum_y = zeros(K, dtype=float32)
     x_kr = zeros((number_of_iterations, K), dtype=float32)
     texpected_results = try_transformation(expected_results,
                                            transformation)
     for i in range(number_of_iterations):
         y_r = function()
         x_kr[i, :] = try_transformation(y_r, transformation)
         sum_y = sum_y + x_kr[i, :]
     muest = sum_y / float(number_of_iterations)
     sigma_1 = ((x_kr - muest)**2.0).sum() / float(number_of_iterations * K)
     self.variance = variance(
         x_kr,
         labels=reshape(array(number_of_iterations * range(1, K + 1)),
                        (number_of_iterations, K)),
         index=arange(K) + 1)
     sigma_0 = ((x_kr - texpected_results)**2.0).sum() / float(
         number_of_iterations * K)
     LRTS = number_of_iterations * K * log(sigma_0 / sigma_1)
     prob = chisqprob(LRTS, K)
     return (K, LRTS, prob)

Beispiel #9

0

Datei anzeigen

Datei: stochastic_test_case.py Projekt: apdjustino/DRCOG_Urbansim

 def _run_stochastic_test_pearson(self, function, expected_results,
                         number_of_iterations, significance_level=0.01, transformation=None):
     K = expected_results.size
     x_kr = zeros((number_of_iterations, K), dtype=float32)
     for i in range(number_of_iterations):
         x_kr[i,:]= function()
     mue = expected_results.astype(float32)
     pearson = 0.0
     for k in range(K):
         pearson = pearson + (((x_kr[:,k] - mue[k])**2.0)/mue[k]).sum()
     prob = chisqprob(pearson, K*number_of_iterations)
     #print pearson, prob
     logger.log_status("Stochastic Test: Pearson Chi^2=" + str(pearson) + ", df=",
                        str(K*number_of_iterations),", p=" + str(prob))
     return (prob >= significance_level, "prob=%f < significance level of %f" % (prob, significance_level))

Beispiel #10

0

Datei anzeigen

Datei: stochastic_test_case.py Projekt: apdjustino/DRCOG_Urbansim

 def chi_square_test_onesided(self, function, expected_values, number_of_iterations, significance_level=0.01, number_of_tries=5):
     """For each test, run a two-sided Chi^2 test"""
     for j in range(number_of_tries):
         K = expected_values.size
         x = zeros((number_of_iterations, K), dtype=float32)
         for i in range(number_of_iterations):
             x[i,:]= function()
         stat = (((x - expected_values)**2.0)/expected_values).sum()
         prob = chisqprob(stat, K*number_of_iterations)
         logger.log_status("Stochastic Test: Chi^2 test statistic = " + str(stat) + ", df=",
                            str(K*number_of_iterations),", p=" + str(prob))
         if (prob >= significance_level/2.0):
             # test succeeded -- jump out of the method
             return
     # test failed more than number_of_tries times
     self.fail(msg="prob=%f is not in [%f,%f]" % (prob, significance_level/2.0, 1-significance_level/2.0))

Beispiel #11

0

Datei anzeigen

Datei: stochastic_test_case.py Projekt: apdjustino/DRCOG_Urbansim

 def compute_stochastic_test_normal(self, function, expected_results,
                         number_of_iterations, significance_level=0.01, transformation="sqrt"):
     K = expected_results.size
     sum_y = zeros(K, dtype=float32)
     x_kr = zeros((number_of_iterations, K), dtype=float32)
     texpected_results = try_transformation(expected_results, transformation)
     for i in range(number_of_iterations):
         y_r = function()
         x_kr[i,:] = try_transformation(y_r, transformation)
         sum_y = sum_y + x_kr[i,:]
     muest = sum_y/float(number_of_iterations)
     sigma_1 = ((x_kr - muest)**2.0).sum()/float(number_of_iterations*K)
     self.variance = variance(x_kr, labels=reshape(array(number_of_iterations*range(1,K+1)),
                                                 (number_of_iterations,K)),
                              index=arange(K)+1)
     sigma_0 = ((x_kr - texpected_results)**2.0).sum()/float(number_of_iterations*K)
     LRTS = number_of_iterations*K * log(sigma_0/sigma_1)
     prob = chisqprob(LRTS, K)
     return (K, LRTS, prob)

Beispiel #12

0

Datei anzeigen

Datei: coefficients.py Projekt: christianurich/VIBe2UrbanSim

    def test_sample_uniform_0_1_coefficients(self):
        """ All coefficients are sampled from U(5,10). """
        
        from opus_core.third_party.pstat import chisqprob
        coef_values = array([0,0], dtype="float32")
        coef = Coefficients(names=array(["coef1", "coef2"]), values = coef_values)

        # for each coefficient run a one-sided Chi^2 test
        expected_values = array([7.5, 7.5])
        TS = zeros(coef_values.size)
        df = 9
        significance_level = 0.05
        for j in range(df+1):
            new_coef = coef.sample_values(distribution='uniform', center_around_value=False, a=5, b=10)
            values = new_coef.get_values()
            TS += ((values - expected_values)**2)/expected_values
            
        for i in range(values.size):
            prob = chisqprob(TS[i], df)
            if (prob < significance_level/2.0):
                self.fail(msg="prob=%f is not in [%f,%f]" % (prob, significance_level/2.0, 1-significance_level/2.0))

Beispiel #13

0

Datei anzeigen

Datei: stochastic_test_case.py Projekt: emiliom/DRCOG_Urbansim

 def _run_stochastic_test_pearson(self,
                                  function,
                                  expected_results,
                                  number_of_iterations,
                                  significance_level=0.01,
                                  transformation=None):
     K = expected_results.size
     x_kr = zeros((number_of_iterations, K), dtype=float32)
     for i in range(number_of_iterations):
         x_kr[i, :] = function()
     mue = expected_results.astype(float32)
     pearson = 0.0
     for k in range(K):
         pearson = pearson + (((x_kr[:, k] - mue[k])**2.0) / mue[k]).sum()
     prob = chisqprob(pearson, K * number_of_iterations)
     #print pearson, prob
     logger.log_status(
         "Stochastic Test: Pearson Chi^2=" + str(pearson) + ", df=",
         str(K * number_of_iterations), ", p=" + str(prob))
     return (prob >= significance_level,
             "prob=%f < significance level of %f" %
             (prob, significance_level))

Beispiel #14

0

Datei anzeigen

Datei: stochastic_test_case.py Projekt: emiliom/DRCOG_Urbansim

 def _run_stochastic_test_poisson(self,
                                  function,
                                  expected_results,
                                  number_of_iterations,
                                  significance_level=0.01,
                                  transformation=None):
     """
     Run the given function for the specified number_of_iterations.
     Uses Bayesian statistics to determine whether the produced results are
     within the specified significance_level of the expected_results.
     """
     K = expected_results.size
     sum_y = zeros(K, dtype=float32)
     x_kr = zeros((number_of_iterations, K), dtype=float32)
     for i in range(number_of_iterations):
         y_r = function()
         x_kr[i, :] = try_transformation(y_r, transformation)
         sum_y = sum_y + x_kr[i, :]
     lambdak = sum_y / float(number_of_iterations)
     lambdanull = try_transformation(expected_results.astype(float32),
                                     transformation)
     #        print lambdak
     #        print lambdanull
     sumxk = sum(x_kr, axis=0)
     LRTS = 2.0 * (
         (number_of_iterations * (lambdanull - lambdak).sum()) +
         (ln(lambdak / ma.masked_where(lambdanull == 0, lambdanull)) *
          sumxk).sum())
     prob = chisqprob(LRTS, K)
     #print LRTS, prob
     logger.log_status(
         "Stochastic Test Poisson: LRTS=" + str(LRTS) + ", df=", str(K),
         ", p=" + str(prob))
     return (prob >= significance_level,
             "prob=%f < significance level of %f" %
             (prob, significance_level))

Beispiel #15

0

Datei anzeigen

Datei: stochastic_test_case.py Projekt: emiliom/DRCOG_Urbansim

 def chi_square_test_onesided(self,
                              function,
                              expected_values,
                              number_of_iterations,
                              significance_level=0.01,
                              number_of_tries=5):
     """For each test, run a two-sided Chi^2 test"""
     for j in range(number_of_tries):
         K = expected_values.size
         x = zeros((number_of_iterations, K), dtype=float32)
         for i in range(number_of_iterations):
             x[i, :] = function()
         stat = (((x - expected_values)**2.0) / expected_values).sum()
         prob = chisqprob(stat, K * number_of_iterations)
         logger.log_status(
             "Stochastic Test: Chi^2 test statistic = " + str(stat) +
             ", df=", str(K * number_of_iterations), ", p=" + str(prob))
         if (prob >= significance_level / 2.0):
             # test succeeded -- jump out of the method
             return
     # test failed more than number_of_tries times
     self.fail(
         msg="prob=%f is not in [%f,%f]" %
         (prob, significance_level / 2.0, 1 - significance_level / 2.0))

Beispiel #16

0

Datei anzeigen

Datei: coefficients.py Projekt: christianurich/VIBe2UrbanSim

    def test_sample_uniform_coefficients(self):
        """Coefficients are sampled from U(x-0.5, x+0.5), where x is the coefficient value."""
        
        from opus_core.third_party.pstat import chisqprob
        coef_values = array([0.5, -0.00001], dtype="float32")
        coef = Coefficients(names=array(["coef1", "coef2"]), values = coef_values)

        # for each coefficient run a one-sided Chi^2 test
        expected_values = coef_values
        TS = zeros(coef_values.size)
        df = 9
        significance_level = 0.05
        for j in range(df+1):
            new_coef = coef.sample_values(distribution='uniform')
            values = new_coef.get_values()
            TS += ((values - expected_values)**2)/expected_values
            
        for i in range(values.size):
            prob = chisqprob(TS[i], df)
            if (prob < significance_level/2.0):
                self.fail(msg="prob=%f is not in [%f,%f]" % (prob, significance_level/2.0, 1-significance_level/2.0))
 
        # check data type
        self.assert_(values.dtype.name == "float32", msg = "Error in coefficients data type.")

Beispiel #17

0

Datei anzeigen

    def estimate_dcm(self, data):
        nobs, alts, nvars, M = data.shape
        self.M = M
        depm = self.resources[
            "chosen_choice"]  # matrix (nobs x alts) of 0's and 1's. 1 is on positions of chosen location.
        tags = ["estimate", "result"]
        vl = 2
        coef_names = self.resources.get("coefficient_names", None)
        nest_numbers = self.get_nest_numbers()

        index_of_fixed_values = zeros(nvars + M, dtype="bool8")
        fixed_coefs, fixed_values = self.resources.get("fixed_values",
                                                       (array([]), array([])))
        if (coef_names is not None) and (fixed_coefs.size > 0):
            index_of_fixed_values[get_indices_of_matched_items(
                coef_names, fixed_coefs)] = True
        index_of_not_fixed_values = logical_not(index_of_fixed_values)

        beta = zeros(nvars + M).astype(float32)
        beta[-M:] = self.range_mu[1]
        beta[index_of_fixed_values] = fixed_values.astype(beta.dtype)
        l_0 = self.nl_loglikelihood(beta, data, depm)

        ls_idx = arange(nvars, nvars + M)
        for name, sv in self.resources.get("starting_values", {}).iteritems():
            est = True
            if isinstance(sv, tuple) or isinstance(sv, list):
                est = sv[1]
                sv = sv[0]
            if name.startswith('__logsum_'):
                if nest_numbers is not None:
                    idx = ls_idx[where(nest_numbers == int(name[9:]))[0]]
                else:
                    idx = array([ls_idx[int(name[9:]) - 1]])
            else:
                idx = ematch(coef_names, name)
            beta[idx] = sv
            index_of_fixed_values[idx] = not (est)

        index_of_not_fixed_values = where(
            logical_not(index_of_fixed_values))[0]
        index_of_fixed_values = where(index_of_fixed_values)[0]

        bounds = index_of_not_fixed_values.size * [(None, None)]
        j = 0
        for i in range(nvars + M - 1, nvars - 1, -1):
            if i in index_of_not_fixed_values:
                bounds[index_of_not_fixed_values.size - j - 1] = self.range_mu
                j += 1

        logger.start_block('BFGS procedure')
        bfgs_result = fmin_bfgs(
            self.minus_nl_loglikelihood,
            beta[index_of_not_fixed_values],
            args=(data, depm, beta[index_of_fixed_values],
                  index_of_not_fixed_values, index_of_fixed_values),
            full_output=True,
            disp=True,
            epsilon=self.resources.get('bfgs_epsilon', self._epsilon),
        )

        logger.end_block()
        beta[index_of_not_fixed_values] = bfgs_result[0].astype(beta.dtype)
        se = zeros(nvars + M)
        tvalues = zeros(nvars + M)
        mingrad = bfgs_result[2]

        if not self.resources.get('bfgs_approximate_second_derivative',
                                  self._approximate_second_derivative):
            inv_hessian = bfgs_result[3]
            se[index_of_not_fixed_values] = sqrt(diagonal(inv_hessian))
        else:
            sec_der = approximate_second_derivative(
                self.minus_nl_loglikelihood,
                beta[index_of_not_fixed_values],
                args=(data, depm, beta[index_of_fixed_values],
                      index_of_not_fixed_values, index_of_fixed_values))
            inv_hessian = 1.0 / sec_der
            se[index_of_not_fixed_values] = sqrt(inv_hessian)

        tvalues[index_of_not_fixed_values] = beta[
            index_of_not_fixed_values] / se[index_of_not_fixed_values]

        l_1 = self.nl_loglikelihood(beta, data, depm)

        ll_ratio = 1 - (l_1 / l_0)
        adj_ll_ratio = 1 - ((l_1 - nvars - M) / l_0)

        # http://en.wikipedia.org/wiki/Akaike_information_criterion
        aic = 2 * index_of_not_fixed_values.size - 2 * l_1
        logger.log_status("Akaike's Information Criterion (AIC): ",
                          str(aic),
                          tags=tags,
                          verbosity=vl)
        bic = -2 * l_1 + index_of_not_fixed_values.size * log(nobs)
        logger.log_status("Bayesian Information Criterion (BIC): ",
                          str(bic),
                          tags=tags,
                          verbosity=vl)
        logger.log_status("***********************************************",
                          tags=tags,
                          verbosity_level=vl)
        logger.log_status('Log-likelihood is:           ',
                          l_1,
                          tags=tags,
                          verbosity_level=vl)
        logger.log_status('Null Log-likelihood is:      ',
                          l_0,
                          tags=tags,
                          verbosity_level=vl)
        logger.log_status('Likelihood ratio index:      ',
                          ll_ratio,
                          tags=tags,
                          verbosity_level=vl)
        logger.log_status('Adj. likelihood ratio index: ',
                          adj_ll_ratio,
                          tags=tags,
                          verbosity_level=vl)
        logger.log_status('Number of observations:      ',
                          nobs,
                          tags=tags,
                          verbosity_level=vl)
        logger.log_status('Suggested |t-value| >        ', sqrt(log(nobs)))
        logger.log_status("-----------------------------------------------",
                          tags=tags,
                          verbosity_level=vl)
        if coef_names is not None:
            nestn = nest_numbers
            if nestn is None:
                nestn = range(1, M + 1)
            names = concatenate(
                (coef_names, array(map(lambda x: '__logsum_%s' % x, nestn))))
        else:
            names = [''] * (nvars + M)
        logger.log_status(
            "Coeff_names\testimate\tstd err\t\tt-values\tgradient",
            tags=tags,
            verbosity_level=vl)
        for i in range(index_of_not_fixed_values.size):
            logger.log_status(
                "%10s\t%8g\t%8g\t%8g\t%8g" %
                (names[index_of_not_fixed_values[i]],
                 beta[index_of_not_fixed_values[i]],
                 se[index_of_not_fixed_values[i]],
                 tvalues[index_of_not_fixed_values[i]], mingrad[i]),
                tags=tags,
                verbosity_level=vl)
        logger.log_status('***********************************************',
                          tags=tags,
                          verbosity_level=vl)
        logger.log_status('Elapsed time: ',
                          time.clock() - self.start_time,
                          'seconds',
                          tags=tags,
                          verbosity_level=vl)
        df = nvars + M - index_of_fixed_values.size
        lrts = -2 * (l_0 - l_1)
        return {
            "estimators": beta,
            "coefficient_names": names,
            "standard_errors": se,
            "other_measures": {
                "t_statistic": tvalues
            },
            "other_info": {
                "p-value": chisqprob(lrts, df),
                "ll_ratio_index": ll_ratio,
                "ll_ratio_test_statistics": lrts,
                "df": df,
                "nobs": nobs
            }
        }

Beispiel #18

0

Datei anzeigen

    def estimate_dcm(self, data):
        nobs, alts, nvars = data.shape
        # matrix (nobs x alts) of 0's and 1's. 1 is on positions of chosen location.
        depm = self.resources["chosen_choice"] 
        coef_names = self.resources.get("coefficient_names", None)
        tags = ["estimate", "result"]
        vl = 2
        
        is_fixed_values = zeros(nvars, dtype="bool")
        fixed_coefs, fixed_values = self.resources.get("fixed_values", 
                                                      (array([]), array([])))
        if (coef_names is not None) and (fixed_coefs.size > 0):
            is_fixed_values[in1d(coef_names, fixed_coefs)] = True
        is_unfixed_values = logical_not(is_fixed_values)

        if is_fixed_values.sum() > 0:
            logger.log_warning("fixed coefficients have not been tested with BFGS estimation procedure; ")
            logger.log_warning("use with caution!")

        beta=zeros(nvars, dtype=self.dtype)
        beta[is_fixed_values] = fixed_values.astype(beta.dtype)
        ll_0=self.loglikelihood(beta, data, depm)

        bounds_lower = bounds_upper = repeat([None], beta.size)
        bounds_lower[is_fixed_values] = bounds_upper[is_fixed_values] = fixed_values
        bounds = zip(bounds_lower, bounds_upper)
                
        logger.start_block('Starting L_BFGS_B procedure...')
        epsilon = self.resources.get('bfgs_epsilon', self.epsilon)
        fprime = self.get_gradient if not self.approx_grad else None
        bfgs_result = fmin_l_bfgs_b(self.minus_loglikelihood, beta, 
                                    args=(data, depm), 
                                    fprime=self.get_gradient,
                                    approx_grad=self.approx_grad, 
                                    bounds=bounds, 
                                    iprint=self.iprint, 
                                    epsilon=epsilon,
                                    maxfun=self.maxiter
                                    )

        beta = bfgs_result[0].astype(beta.dtype)
        func_at_min = bfgs_result[1]
        info = bfgs_result[2]
        status = {0:'Convergence achieved.', 
                  1:'Maximum iterations reached without convergence.',
                  2:'Stop for another reason: %s.' % info['task'] if info.has_key('task') \
                                                                  else 'unknown'
                 }
        warnflag = ''
        if info['warnflag'] != 0:
            warnflag = status[info['warnflag']]

        grad_at_min = info['grad']
        g = self.get_gradient_by_agent(beta, data, depm)
        try:
            h=self.get_hessian(g)
        except:
            msg = "Estimation led to singular matrix. No results."
            warnflag += msg + "\n"
            logger.log_warning(msg, tags=tags, verbosity_level=vl)
            return {}

        g=g.sum(axis=0)
        c=dot(dot(transpose(g),h),g)

        se=(self.get_standard_error(h)).astype(self.dtype)
        se[is_fixed_values] = 0.0
        tvalues=zeros(nvars, dtype=self.dtype)

 
        tvalues[is_unfixed_values] = beta[is_unfixed_values]/se[is_unfixed_values]
        
        ll_1=self.loglikelihood(beta, data, depm)
        ll_ratio = 1-(ll_1/ll_0)
        adj_ll_ratio = 1-((ll_1-nvars)/ll_0)
        
        # http://en.wikipedia.org/wiki/Akaike_information_criterion 
        aic = 2 * is_unfixed_values.size - 2 * ll_1
        bic = -2 * ll_1 + is_unfixed_values.size * log(nobs)
        df=nvars-is_fixed_values.sum()
        lrts = -2*(ll_0-ll_1)
        iters = info['funcalls']

        results = {"coefficient_names":coef_names,
                  "estimators":beta, 
                  "standard_errors":se, 
                  "other_measures":{"t_statistic": tvalues},
                  "other_info":{"aic": aic,
                                "bic": bic,
                                "p-value":chisqprob(lrts, df),
                                "l_0": ll_0,
                                "l_1": ll_1,
                                "ll_ratio_index":ll_ratio,
                                "ll_ratio_test_statistics":lrts,
                                "convergence": c,
                                "df": df,  
                                "nobs":nobs,
                                "nvars": nvars,
                                "nalts": alts,
                                "iterations": iters
                                },
                   "warnflag": warnflag
                  }
        self.print_results(results)
        logger.end_block()
        #logger.log_status('Elapsed time: ', time.clock()-self.start_time, 'seconds',
        #                  tags=tags, verbosity_level=vl)
        return results

Beispiel #19

0

Datei anzeigen

Datei: bhhh_mnl_estimation.py Projekt: christianurich/VIBe2UrbanSim

    def estimate_dcm(self, data):
        maxiter=self.maximum_iterations #Maximum iterations allowed
        eps=0.001 #Convergence criterion for gradient*hessian-inv*gradient
        tags = ["estimate", "result"]
        vl = 2
        nobs, alts, nvars = data.shape
        depm = self.resources["chosen_choice"] # matrix (nobs x alts) of 0's and 1's. 1 is on positions of chosen location.
        coef_names = self.resources.get("coefficient_names", None)
        fixed_coefs, fixed_values = self.resources.get("fixed_values", (array([]), array([])))
        if (coef_names is not None) and (fixed_coefs.size > 0):
            index_of_fixed_values = get_indices_of_matched_items(coef_names, fixed_coefs)
            index_of_not_fixed_values = ones(nvars, dtype="bool8")
            index_of_not_fixed_values[index_of_fixed_values] = False
            index_of_not_fixed_values = where(index_of_not_fixed_values)[0]
        else:
            index_of_fixed_values = array([], dtype="int32")
            index_of_not_fixed_values = arange(nvars)
#        pdb.set_trace()
        b2=zeros(nvars).astype(float32)
        b2[index_of_fixed_values] = fixed_values.astype(b2.dtype)        
        se = zeros(nvars).astype(float32)
        tvalues = zeros(nvars).astype(float32)
        l_2=self.mnl_loglikelihood(data, b2, depm)
        l_0 = l_2
        s=1

        for it in range(maxiter):
            b1=b2
            l_1=l_2
            g=(self.mnl_gradient(data, b1, depm, index_of_not_fixed_values))
            try:
                h=self.get_hessian(g)
            except:
                logger.log_warning("Estimation led to singular matrix. No results.", tags=tags, verbosity_level=vl)
                return {}
            g=g.sum(axis=0)
            c=dot(dot(transpose(g),h),g)
            if c <= eps:
                logger.log_status('Convergence achieved.', tags=tags, verbosity_level=vl)
                break
            d=dot(h,g)
            b2[index_of_not_fixed_values]=(b1[index_of_not_fixed_values]+s*d).astype(b2.dtype)
            l_2=self.mnl_loglikelihood(data,b2, depm)
            if l_2 <= l_1:
                s=s/2.0
            if s <= .001:
                logger.log_warning('Cannot find increase', tags=tags, verbosity_level=vl)
                break
        # end of the iteration loop
        
        if it>=(maxiter-1):
            logger.log_warning('Maximum iterations reached without convergence', tags=tags, verbosity_level=vl)
 
        se[index_of_not_fixed_values]=self.get_standard_error(h).astype(se.dtype)
        tvalues[index_of_not_fixed_values] = (b1[index_of_not_fixed_values]/se[index_of_not_fixed_values]).astype(tvalues.dtype)
        ll_ratio = 1-(l_1/l_0)
        adj_ll_ratio = 1-((l_1-nvars)/l_0)
        
        # http://en.wikipedia.org/wiki/Akaike_information_criterion 
        aic = 2 * index_of_not_fixed_values.size - 2 * l_1
        logger.log_status("Akaike's Information Criterion (AIC): ", str(aic), tags=tags, verbosity=vl)
        bic = -2 * l_1 + index_of_not_fixed_values.size * log(nobs)
        logger.log_status("Bayesian Information Criterion (BIC): ", str(bic), tags=tags, verbosity=vl)
        
        logger.log_status("Number of Iterations: ", it+1, tags=tags, verbosity_level=vl)
        logger.log_status("***********************************************", tags=tags, verbosity_level=vl)
        logger.log_status('Log-likelihood is:           ', l_1, tags=tags, verbosity_level=vl)
        logger.log_status('Null Log-likelihood is:      ', l_0, tags=tags, verbosity_level=vl)
        logger.log_status('Likelihood ratio index:      ', ll_ratio, tags=tags, verbosity_level=vl)
        logger.log_status('Adj. likelihood ratio index: ', adj_ll_ratio, tags=tags, verbosity_level=vl)
        logger.log_status('Number of observations:      ', nobs, tags=tags, verbosity_level=vl)
        logger.log_status('Suggested |t-value| >        ', sqrt(log(nobs)))
        logger.log_status('Convergence statistic is:    ', c, tags=tags, verbosity_level=vl)
        logger.log_status("-----------------------------------------------", tags=tags, verbosity_level=vl)
        if coef_names is not None:
            names = coef_names
        else:
            names = ['']*index_of_not_fixed_values.size
        logger.log_status("Coeff_names\testimate\tstd err\t\tt-values", tags=tags, verbosity_level=vl)
        for i in index_of_not_fixed_values:
            logger.log_status("%10s\t%8g\t%8g\t%8g" % (names[i],b1[i],se[i],tvalues[i]), tags=tags, verbosity_level=vl)
        logger.log_status('***********************************************', tags=tags, verbosity_level=vl)
        logger.log_status('Elapsed time: ',time.clock()-self.start_time, 'seconds', tags=tags, verbosity_level=vl)
        est = b1
        df=nvars-index_of_fixed_values.size
        lrts = -2*(l_0-l_1)
        return {"estimators":est, "standard_errors":se, "other_measures":{"t_statistic": tvalues},
                 "other_info":{"p-value":chisqprob(lrts, df),
                    "ll_ratio_index":ll_ratio,
                    "ll_ratio_test_statistics":lrts, "df": df,  "nobs":nobs}}

Beispiel #20

0

Datei anzeigen

Datei: bhhh_mnl_estimation.py Projekt: apdjustino/DRCOG_Urbansim

    def estimate_dcm(self, data):
        maxiter=self.maximum_iterations #Maximum iterations allowed
        eps=0.001 #Convergence criterion for gradient*hessian-inv*gradient
        tags = ["estimate", "result"]
        vl = 2
        nobs, alts, nvars = data.shape
        depm = self.resources["chosen_choice"] # matrix (nobs x alts) of 0's and 1's. 1 is on positions of chosen location.
        coef_names = self.resources.get("coefficient_names", None)
        fixed_coefs, fixed_values = self.resources.get("fixed_values", (array([]), array([])))
        if (coef_names is not None) and (fixed_coefs.size > 0):
            index_of_fixed_values = get_indices_of_matched_items(coef_names, fixed_coefs)
            index_of_not_fixed_values = ones(nvars, dtype="bool8")
            index_of_not_fixed_values[index_of_fixed_values] = False
            index_of_not_fixed_values = where(index_of_not_fixed_values)[0]
        else:
            index_of_fixed_values = array([], dtype="int32")
            index_of_not_fixed_values = arange(nvars)
#        pdb.set_trace()
        b2=zeros(nvars).astype(float32)
        b2[index_of_fixed_values] = fixed_values.astype(b2.dtype)        
        se = zeros(nvars).astype(float32)
        tvalues = zeros(nvars).astype(float32)
        l_2=self.mnl_loglikelihood(data, b2, depm)
        l_0 = l_2
        s=1
        warnflag = ''

        for it in range(maxiter):
            b1=b2
            l_1=l_2
            g=(self.mnl_gradient(data, b1, depm, index_of_not_fixed_values))
            try:
                h=self.get_hessian(g)
            except:
                msg = "Estimation led to singular matrix. No results."
                warnflag += msg + "\n"
                logger.log_warning(msg, tags=tags, verbosity_level=vl)
                return {}
            g=g.sum(axis=0)
            c=dot(dot(transpose(g),h),g)
            if c <= eps:
                msg = "Convergence achieved."
                logger.log_status(msg, tags=tags, verbosity_level=vl)
                break
            d=dot(h,g)
            b2[index_of_not_fixed_values]=(b1[index_of_not_fixed_values]+s*d).astype(b2.dtype)
            l_2=self.mnl_loglikelihood(data,b2, depm)
            if l_2 <= l_1:
                s=s/2.0
            if s <= .001:
                msg = "Cannot find increase."
                warnflag += msg + "\n"
                #logger.log_warning(msg, tags=tags, verbosity_level=vl)
                break
        # end of the iteration loop
        
        if it>=(maxiter-1):
            msg = "Maximum iterations reached without convergence."
            warnflag += msg + "\n"
            #logger.log_warning(msg, tags=tags, verbosity_level=vl)
 
        se[index_of_not_fixed_values]=self.get_standard_error(h).astype(se.dtype)
        tvalues[index_of_not_fixed_values] = (b1[index_of_not_fixed_values]/se[index_of_not_fixed_values]).astype(tvalues.dtype)
        ll_ratio = 1-(l_1/l_0)
        adj_ll_ratio = 1-((l_1-nvars)/l_0)
        
        # http://en.wikipedia.org/wiki/Akaike_information_criterion 
        aic = 2 * index_of_not_fixed_values.size - 2 * l_1
        bic = -2 * l_1 + index_of_not_fixed_values.size * log(nobs)

        if coef_names is not None:
            names = coef_names
        else:
            names = ['']*index_of_not_fixed_values.size

        est = b1
        df=nvars-index_of_fixed_values.size
        lrts = -2*(l_0-l_1)

        result = {"coefficient_names":names,
                  "estimators":est, 
                  "standard_errors":se, 
                  "other_measures":{"t_statistic": tvalues},
                  "other_info":{"aic": aic,
                                "bic": bic,
                                "p-value":chisqprob(lrts, df),
                                "l_0": l_0,
                                "l_1": l_1,
                                "ll_ratio_index":ll_ratio,
                                "ll_ratio_test_statistics":lrts,
                                "convergence": c,
                                "df": df,  
                                "nobs":nobs,
                                "nvars": nvars,
                                "nalts": alts,
                                "iterations": it+1
                                },
                  "warnflag": warnflag}
        self.print_results(result)
        
        return result

Beispiel #21

0

Datei anzeigen

    def estimate_dcm(self, data):
        maxiter = self.maximum_iterations  #Maximum iterations allowed
        eps = 0.001  #Convergence criterion for gradient*hessian-inv*gradient
        tags = ["estimate", "result"]
        vl = 2
        nobs, alts, nvars = data.shape
        depm = self.resources[
            "chosen_choice"]  # matrix (nobs x alts) of 0's and 1's. 1 is on positions of chosen location.
        coef_names = self.resources.get("coefficient_names", None)
        fixed_coefs, fixed_values = self.resources.get("fixed_values",
                                                       (array([]), array([])))
        if (coef_names is not None) and (fixed_coefs.size > 0):
            index_of_fixed_values = get_indices_of_matched_items(
                coef_names, fixed_coefs)
            index_of_not_fixed_values = ones(nvars, dtype="bool8")
            index_of_not_fixed_values[index_of_fixed_values] = False
            index_of_not_fixed_values = where(index_of_not_fixed_values)[0]
        else:
            index_of_fixed_values = array([], dtype="int32")
            index_of_not_fixed_values = arange(nvars)
#        pdb.set_trace()
        b2 = zeros(nvars).astype(float32)
        b2[index_of_fixed_values] = fixed_values.astype(b2.dtype)
        se = zeros(nvars).astype(float32)
        tvalues = zeros(nvars).astype(float32)
        l_2 = self.mnl_loglikelihood(data, b2, depm)
        l_0 = l_2
        s = 1

        for it in range(maxiter):
            b1 = b2
            l_1 = l_2
            g = (self.mnl_gradient(data, b1, depm, index_of_not_fixed_values))
            try:
                h = self.get_hessian(g)
            except:
                logger.log_warning(
                    "Estimation led to singular matrix. No results.",
                    tags=tags,
                    verbosity_level=vl)
                return {}
            g = g.sum(axis=0)
            c = dot(dot(transpose(g), h), g)
            if c <= eps:
                logger.log_status('Convergence achieved.',
                                  tags=tags,
                                  verbosity_level=vl)
                break
            d = dot(h, g)
            b2[index_of_not_fixed_values] = (b1[index_of_not_fixed_values] +
                                             s * d).astype(b2.dtype)
            l_2 = self.mnl_loglikelihood(data, b2, depm)
            if l_2 <= l_1:
                s = s / 2.0
            if s <= .001:
                logger.log_warning('Cannot find increase',
                                   tags=tags,
                                   verbosity_level=vl)
                break
        # end of the iteration loop

        if it >= (maxiter - 1):
            logger.log_warning(
                'Maximum iterations reached without convergence',
                tags=tags,
                verbosity_level=vl)

        se[index_of_not_fixed_values] = self.get_standard_error(h).astype(
            se.dtype)
        tvalues[index_of_not_fixed_values] = (
            b1[index_of_not_fixed_values] /
            se[index_of_not_fixed_values]).astype(tvalues.dtype)
        ll_ratio = 1 - (l_1 / l_0)
        adj_ll_ratio = 1 - ((l_1 - nvars) / l_0)

        # http://en.wikipedia.org/wiki/Akaike_information_criterion
        aic = 2 * index_of_not_fixed_values.size - 2 * l_1
        logger.log_status("Akaike's Information Criterion (AIC): ",
                          str(aic),
                          tags=tags,
                          verbosity=vl)
        bic = -2 * l_1 + index_of_not_fixed_values.size * log(nobs)
        logger.log_status("Bayesian Information Criterion (BIC): ",
                          str(bic),
                          tags=tags,
                          verbosity=vl)

        logger.log_status("Number of Iterations: ",
                          it + 1,
                          tags=tags,
                          verbosity_level=vl)
        logger.log_status("***********************************************",
                          tags=tags,
                          verbosity_level=vl)
        logger.log_status('Log-likelihood is:           ',
                          l_1,
                          tags=tags,
                          verbosity_level=vl)
        logger.log_status('Null Log-likelihood is:      ',
                          l_0,
                          tags=tags,
                          verbosity_level=vl)
        logger.log_status('Likelihood ratio index:      ',
                          ll_ratio,
                          tags=tags,
                          verbosity_level=vl)
        logger.log_status('Adj. likelihood ratio index: ',
                          adj_ll_ratio,
                          tags=tags,
                          verbosity_level=vl)
        logger.log_status('Number of observations:      ',
                          nobs,
                          tags=tags,
                          verbosity_level=vl)
        logger.log_status('Suggested |t-value| >        ', sqrt(log(nobs)))
        logger.log_status('Convergence statistic is:    ',
                          c,
                          tags=tags,
                          verbosity_level=vl)
        logger.log_status("-----------------------------------------------",
                          tags=tags,
                          verbosity_level=vl)
        if coef_names is not None:
            names = coef_names
        else:
            names = [''] * index_of_not_fixed_values.size
        logger.log_status("Coeff_names\testimate\tstd err\t\tt-values",
                          tags=tags,
                          verbosity_level=vl)
        for i in index_of_not_fixed_values:
            logger.log_status("%10s\t%8g\t%8g\t%8g" %
                              (names[i], b1[i], se[i], tvalues[i]),
                              tags=tags,
                              verbosity_level=vl)
        logger.log_status('***********************************************',
                          tags=tags,
                          verbosity_level=vl)
        logger.log_status('Elapsed time: ',
                          time.clock() - self.start_time,
                          'seconds',
                          tags=tags,
                          verbosity_level=vl)
        est = b1
        df = nvars - index_of_fixed_values.size
        lrts = -2 * (l_0 - l_1)
        return {
            "estimators": est,
            "standard_errors": se,
            "other_measures": {
                "t_statistic": tvalues
            },
            "other_info": {
                "p-value": chisqprob(lrts, df),
                "ll_ratio_index": ll_ratio,
                "ll_ratio_test_statistics": lrts,
                "df": df,
                "nobs": nobs
            }
        }

Beispiel #22

0

Datei anzeigen

Datei: bhhh_mnl_estimation.py Projekt: emiliom/DRCOG_Urbansim

    def estimate_dcm(self, data):
        maxiter = self.maximum_iterations  #Maximum iterations allowed
        eps = 0.001  #Convergence criterion for gradient*hessian-inv*gradient
        tags = ["estimate", "result"]
        vl = 2
        nobs, alts, nvars = data.shape
        depm = self.resources[
            "chosen_choice"]  # matrix (nobs x alts) of 0's and 1's. 1 is on positions of chosen location.
        coef_names = self.resources.get("coefficient_names", None)
        fixed_coefs, fixed_values = self.resources.get("fixed_values",
                                                       (array([]), array([])))
        if (coef_names is not None) and (fixed_coefs.size > 0):
            index_of_fixed_values = get_indices_of_matched_items(
                coef_names, fixed_coefs)
            index_of_not_fixed_values = ones(nvars, dtype="bool8")
            index_of_not_fixed_values[index_of_fixed_values] = False
            index_of_not_fixed_values = where(index_of_not_fixed_values)[0]
        else:
            index_of_fixed_values = array([], dtype="int32")
            index_of_not_fixed_values = arange(nvars)
#        pdb.set_trace()
        b2 = zeros(nvars).astype(float32)
        b2[index_of_fixed_values] = fixed_values.astype(b2.dtype)
        se = zeros(nvars).astype(float32)
        tvalues = zeros(nvars).astype(float32)
        l_2 = self.mnl_loglikelihood(data, b2, depm)
        l_0 = l_2
        s = 1
        warnflag = ''

        for it in range(maxiter):
            b1 = b2
            l_1 = l_2
            g = (self.mnl_gradient(data, b1, depm, index_of_not_fixed_values))
            try:
                h = self.get_hessian(g)
            except:
                msg = "Estimation led to singular matrix. No results."
                warnflag += msg + "\n"
                logger.log_warning(msg, tags=tags, verbosity_level=vl)
                return {}
            g = g.sum(axis=0)
            c = dot(dot(transpose(g), h), g)
            if c <= eps:
                msg = "Convergence achieved."
                logger.log_status(msg, tags=tags, verbosity_level=vl)
                break
            d = dot(h, g)
            b2[index_of_not_fixed_values] = (b1[index_of_not_fixed_values] +
                                             s * d).astype(b2.dtype)
            l_2 = self.mnl_loglikelihood(data, b2, depm)
            if l_2 <= l_1:
                s = s / 2.0
            if s <= .001:
                msg = "Cannot find increase."
                warnflag += msg + "\n"
                #logger.log_warning(msg, tags=tags, verbosity_level=vl)
                break
        # end of the iteration loop

        if it >= (maxiter - 1):
            msg = "Maximum iterations reached without convergence."
            warnflag += msg + "\n"
            #logger.log_warning(msg, tags=tags, verbosity_level=vl)

        se[index_of_not_fixed_values] = self.get_standard_error(h).astype(
            se.dtype)
        tvalues[index_of_not_fixed_values] = (
            b1[index_of_not_fixed_values] /
            se[index_of_not_fixed_values]).astype(tvalues.dtype)
        ll_ratio = 1 - (l_1 / l_0)
        adj_ll_ratio = 1 - ((l_1 - nvars) / l_0)

        # http://en.wikipedia.org/wiki/Akaike_information_criterion
        aic = 2 * index_of_not_fixed_values.size - 2 * l_1
        bic = -2 * l_1 + index_of_not_fixed_values.size * log(nobs)

        if coef_names is not None:
            names = coef_names
        else:
            names = [''] * index_of_not_fixed_values.size

        est = b1
        df = nvars - index_of_fixed_values.size
        lrts = -2 * (l_0 - l_1)

        result = {
            "coefficient_names": names,
            "estimators": est,
            "standard_errors": se,
            "other_measures": {
                "t_statistic": tvalues
            },
            "other_info": {
                "aic": aic,
                "bic": bic,
                "p-value": chisqprob(lrts, df),
                "l_0": l_0,
                "l_1": l_1,
                "ll_ratio_index": ll_ratio,
                "ll_ratio_test_statistics": lrts,
                "convergence": c,
                "df": df,
                "nobs": nobs,
                "nvars": nvars,
                "nalts": alts,
                "iterations": it + 1
            },
            "warnflag": warnflag
        }
        self.print_results(result)

        return result

Beispiel #23

0

Datei anzeigen

Datei: bfgs_mnl_estimation.py Projekt: emiliom/DRCOG_Urbansim

    def estimate_dcm(self, data):
        nobs, alts, nvars = data.shape
        # matrix (nobs x alts) of 0's and 1's. 1 is on positions of chosen location.
        depm = self.resources["chosen_choice"]
        coef_names = self.resources.get("coefficient_names", None)
        tags = ["estimate", "result"]
        vl = 2

        is_fixed_values = zeros(nvars, dtype="bool")
        fixed_coefs, fixed_values = self.resources.get("fixed_values",
                                                       (array([]), array([])))
        if (coef_names is not None) and (fixed_coefs.size > 0):
            is_fixed_values[in1d(coef_names, fixed_coefs)] = True
        is_unfixed_values = logical_not(is_fixed_values)

        if is_fixed_values.sum() > 0:
            logger.log_warning(
                "fixed coefficients have not been tested with BFGS estimation procedure; "
            )
            logger.log_warning("use with caution!")

        beta = zeros(nvars, dtype=self.dtype)
        beta[is_fixed_values] = fixed_values.astype(beta.dtype)
        ll_0 = self.loglikelihood(beta, data, depm)

        bounds_lower = bounds_upper = repeat([None], beta.size)
        bounds_lower[is_fixed_values] = bounds_upper[
            is_fixed_values] = fixed_values
        bounds = zip(bounds_lower, bounds_upper)

        logger.start_block('Starting L_BFGS_B procedure...')
        epsilon = self.resources.get('bfgs_epsilon', self.epsilon)
        fprime = self.get_gradient if not self.approx_grad else None
        bfgs_result = fmin_l_bfgs_b(self.minus_loglikelihood,
                                    beta,
                                    args=(data, depm),
                                    fprime=self.get_gradient,
                                    approx_grad=self.approx_grad,
                                    bounds=bounds,
                                    iprint=self.iprint,
                                    epsilon=epsilon,
                                    maxfun=self.maxiter)

        beta = bfgs_result[0].astype(beta.dtype)
        func_at_min = bfgs_result[1]
        info = bfgs_result[2]
        status = {0:'Convergence achieved.',
                  1:'Maximum iterations reached without convergence.',
                  2:'Stop for another reason: %s.' % info['task'] if info.has_key('task') \
                                                                  else 'unknown'
                 }
        warnflag = ''
        if info['warnflag'] != 0:
            warnflag = status[info['warnflag']]

        grad_at_min = info['grad']
        g = self.get_gradient_by_agent(beta, data, depm)
        try:
            h = self.get_hessian(g)
        except:
            msg = "Estimation led to singular matrix. No results."
            warnflag += msg + "\n"
            logger.log_warning(msg, tags=tags, verbosity_level=vl)
            return {}

        g = g.sum(axis=0)
        c = dot(dot(transpose(g), h), g)

        se = (self.get_standard_error(h)).astype(self.dtype)
        se[is_fixed_values] = 0.0
        tvalues = zeros(nvars, dtype=self.dtype)

        tvalues[is_unfixed_values] = beta[is_unfixed_values] / se[
            is_unfixed_values]

        ll_1 = self.loglikelihood(beta, data, depm)
        ll_ratio = 1 - (ll_1 / ll_0)
        adj_ll_ratio = 1 - ((ll_1 - nvars) / ll_0)

        # http://en.wikipedia.org/wiki/Akaike_information_criterion
        aic = 2 * is_unfixed_values.size - 2 * ll_1
        bic = -2 * ll_1 + is_unfixed_values.size * log(nobs)
        df = nvars - is_fixed_values.sum()
        lrts = -2 * (ll_0 - ll_1)
        iters = info['funcalls']

        results = {
            "coefficient_names": coef_names,
            "estimators": beta,
            "standard_errors": se,
            "other_measures": {
                "t_statistic": tvalues
            },
            "other_info": {
                "aic": aic,
                "bic": bic,
                "p-value": chisqprob(lrts, df),
                "l_0": ll_0,
                "l_1": ll_1,
                "ll_ratio_index": ll_ratio,
                "ll_ratio_test_statistics": lrts,
                "convergence": c,
                "df": df,
                "nobs": nobs,
                "nvars": nvars,
                "nalts": alts,
                "iterations": iters
            },
            "warnflag": warnflag
        }
        self.print_results(results)
        logger.end_block()
        #logger.log_status('Elapsed time: ', time.clock()-self.start_time, 'seconds',
        #                  tags=tags, verbosity_level=vl)
        return results

Beispiel #24

0

Datei anzeigen

Datei: bfgs_nl_estimation.py Projekt: apdjustino/DRCOG_Urbansim

    def estimate_dcm(self, data):
        nobs, alts, nvars, M = data.shape
        self.M = M
        depm = self.resources["chosen_choice"] # matrix (nobs x alts) of 0's and 1's. 1 is on positions of chosen location.
        tags = ["estimate", "result"]
        vl = 2
        coef_names = self.resources.get("coefficient_names", None)
        nest_numbers = self.get_nest_numbers()
        
        index_of_fixed_values = zeros(nvars+M, dtype="bool8")
        fixed_coefs, fixed_values = self.resources.get("fixed_values", (array([]), array([])))
        if (coef_names is not None) and (fixed_coefs.size > 0):
            index_of_fixed_values[get_indices_of_matched_items(coef_names, fixed_coefs)] = True
        index_of_not_fixed_values = logical_not(index_of_fixed_values)
        
        beta=ones(nvars+M).astype(float32)
        beta[-M:] = self.range_mu[1]
        beta[index_of_fixed_values] = fixed_values.astype(beta.dtype)
        l_0beta = zeros(nvars+M).astype(float32)
        l_0beta[-M:] = 1
        l_0 = self.nl_loglikelihood(l_0beta, data, depm)

        ls_idx = arange(nvars, nvars+M)
        for name, sv in self.resources.get("starting_values", {}).iteritems():
            est = True
            if isinstance(sv, tuple) or isinstance(sv, list):
                est = sv[1]
                sv = sv[0]
            if name.startswith('__logsum_'):
                if nest_numbers is not None:
                    idx = ls_idx[where(nest_numbers == int(name[9:]))[0]]
                else:
                    idx = array([ls_idx[int(name[9:])-1]])
            else:
                idx = ematch(coef_names, name)
            beta[idx] = sv
            index_of_fixed_values[idx] = not(est)

        index_of_not_fixed_values = where(logical_not(index_of_fixed_values))[0] 
        index_of_fixed_values = where(index_of_fixed_values)[0]
        
        bounds = index_of_not_fixed_values.size*[(-5.0,5.0)]
        j=0
        for i in range(nvars+M-1, nvars-1, -1):
            if i in index_of_not_fixed_values:
                bounds[index_of_not_fixed_values.size-j-1] = self.range_mu
                j+=1
                
        logger.start_block('BFGS procedure')
        bfgs_result = fmin_l_bfgs_b(self.minus_nl_loglikelihood, beta[index_of_not_fixed_values], pgtol=.01,
                                args=(data, depm, beta[index_of_fixed_values], index_of_not_fixed_values, index_of_fixed_values), 
                                bounds=bounds,approx_grad=True,
                                disp=True, epsilon=self.resources.get('bfgs_epsilon', self._epsilon),
                                )

        logger.end_block()
        beta[index_of_not_fixed_values] = bfgs_result[0].astype(beta.dtype)
        se = zeros(nvars+M)
        tvalues = zeros(nvars+M)
        mingrad = bfgs_result[2]['grad']

        if 0: # hessian is no longer provided by bfgs ## not self.resources.get('bfgs_approximate_second_derivative', self._approximate_second_derivative):
            inv_hessian = bfgs_result[3]
            se[index_of_not_fixed_values] = sqrt(diagonal(inv_hessian))
        else:
            sec_der = approximate_second_derivative(self.minus_nl_loglikelihood, beta[index_of_not_fixed_values],
                                                args=(data, depm, beta[index_of_fixed_values], index_of_not_fixed_values, 
                                                      index_of_fixed_values))
            inv_hessian = 1.0/sec_der
            se[index_of_not_fixed_values] = sqrt(inv_hessian)
        
        tvalues[index_of_not_fixed_values] = beta[index_of_not_fixed_values]/se[index_of_not_fixed_values]
        
        l_1=self.nl_loglikelihood(beta, data, depm)

        ll_ratio = 1-(l_1/l_0)
        adj_ll_ratio = 1-((l_1-nvars-M)/l_0)
        
        # http://en.wikipedia.org/wiki/Akaike_information_criterion 
        aic = 2 * index_of_not_fixed_values.size - 2 * l_1
        logger.log_status("Akaike's Information Criterion (AIC): ", str(aic), tags=tags, verbosity=vl)
        bic = -2 * l_1 + index_of_not_fixed_values.size * log(nobs)
        logger.log_status("Bayesian Information Criterion (BIC): ", str(bic), tags=tags, verbosity=vl)
        logger.log_status("***********************************************", tags=tags, verbosity_level=vl)
        logger.log_status('Log-likelihood is:           ', l_1, tags=tags, verbosity_level=vl)
        logger.log_status('Null Log-likelihood is:      ', l_0, tags=tags, verbosity_level=vl)
        logger.log_status('Likelihood ratio index:      ', ll_ratio, tags=tags, verbosity_level=vl)
        logger.log_status('Adj. likelihood ratio index: ', adj_ll_ratio, tags=tags, verbosity_level=vl)
        logger.log_status('Number of observations:      ', nobs, tags=tags, verbosity_level=vl)
        logger.log_status('Suggested |t-value| >        ', sqrt(log(nobs)))
        logger.log_status('WARNING: Standard errors printed below are approximated')
        logger.log_status("-----------------------------------------------", tags=tags, verbosity_level=vl)
        if coef_names is not None:
            nestn = nest_numbers
            if nestn is None:
                nestn = range(1,M+1)
            names = concatenate((coef_names, array(map(lambda x: '__logsum_%s' % x, nestn))))
        else:
            names = ['']*(nvars+M)
        logger.log_status("Coeff_names\testimate\tstd err\t\tt-values\tgradient", tags=tags, verbosity_level=vl)
        for i in range(index_of_not_fixed_values.size):
            logger.log_status("%10s\t%8g\t%8g\t%8g\t%8g" % (names[index_of_not_fixed_values[i]],
                                                            beta[index_of_not_fixed_values[i]],
                                                            se[index_of_not_fixed_values[i]],
                                                            tvalues[index_of_not_fixed_values[i]], mingrad[i]), 
                              tags=tags, verbosity_level=vl)
        logger.log_status('***********************************************', tags=tags, verbosity_level=vl)
        logger.log_status('Elapsed time: ',time.clock()-self.start_time, 'seconds', tags=tags, verbosity_level=vl)
        df=nvars+M-index_of_fixed_values.size
        lrts = -2*(l_0-l_1)
        return {"estimators":beta, "coefficient_names": names, "standard_errors":se, "other_measures":{"t_statistic": tvalues},
                 "other_info":{"p-value":chisqprob(lrts, df),
                    "ll_ratio_index":ll_ratio,
                    "ll_ratio_test_statistics":lrts, "df": df,  "nobs":nobs}}