Ejemplo n.º 1
0
def test_ic():
    #test information criteria
    #consistency check

    ics = [aic, aicc, bic, hqic]
    ics_sig = [aic_sigma, aicc_sigma, bic_sigma, hqic_sigma]

    for ic, ic_sig in zip(ics, ics_sig):
        assert_(ic(np.array(2),10,2).dtype == np.float, msg=repr(ic))
        assert_(ic_sig(np.array(2),10,2).dtype == np.float, msg=repr(ic_sig) )

        assert_almost_equal(ic(-10./2.*np.log(2.),10,2)/10,
                            ic_sig(2, 10, 2),
                            decimal=14)

        assert_almost_equal(ic_sig(np.log(2.),10,2, islog=True),
                            ic_sig(2, 10, 2),
                            decimal=14)


    #examples penalty directly from formula
    n, k = 10, 2
    assert_almost_equal(aic(0, 10, 2), 2*k, decimal=14)
    #next see Wikipedia
    assert_almost_equal(aicc(0, 10, 2),
                        aic(0, n, k) + 2*k*(k+1.)/(n-k-1.), decimal=14)
    assert_almost_equal(bic(0, 10, 2), np.log(n)*k, decimal=14)
    assert_almost_equal(hqic(0, 10, 2), 2*np.log(np.log(n))*k, decimal=14)
Ejemplo n.º 2
0
def test_ic():
    # test information criteria

    # examples penalty directly from formula
    n = 10
    k = 2
    assert_almost_equal(aic(0, 10, 2), 2*k, decimal=14)
    # next see Wikipedia
    assert_almost_equal(aicc(0, 10, 2),
                        aic(0, n, k) + 2*k*(k+1.)/(n-k-1.), decimal=14)
    assert_almost_equal(bic(0, 10, 2), np.log(n)*k, decimal=14)
    assert_almost_equal(hqic(0, 10, 2), 2*np.log(np.log(n))*k, decimal=14)
def test_ic():
    # test information criteria

    # examples penalty directly from formula
    n = 10
    k = 2
    assert_almost_equal(aic(0, 10, 2), 2 * k, decimal=14)
    # next see Wikipedia
    assert_almost_equal(aicc(0, 10, 2),
                        aic(0, n, k) + 2 * k * (k + 1.) / (n - k - 1.),
                        decimal=14)
    assert_almost_equal(bic(0, 10, 2), np.log(n) * k, decimal=14)
    assert_almost_equal(hqic(0, 10, 2), 2 * np.log(np.log(n)) * k, decimal=14)
Ejemplo n.º 4
0
def test_ols():
    # More comprehensive tests against OLS estimates
    mod = RecursiveLS(endog, dta['m1'])
    res = mod.fit()

    mod_ols = OLS(endog, dta['m1'])
    res_ols = mod_ols.fit()

    # Regression coefficients, standard errors, and estimated scale
    assert_allclose(res.params, res_ols.params)
    assert_allclose(res.bse, res_ols.bse)
    # Note: scale here is computed according to Harvey, 1989, 4.2.5, and is
    # the called the ML estimator and sometimes (e.g. later in section 5)
    # denoted \tilde \sigma_*^2
    assert_allclose(res.filter_results.obs_cov[0, 0], res_ols.scale)

    # OLS residuals are equivalent to smoothed forecast errors
    # (the latter are defined as e_t|T by Harvey, 1989, 5.4.5)
    # (this follows since the smoothed state simply contains the
    # full-information estimates of the regression coefficients)
    actual = (mod.endog[:, 0] -
              np.sum(mod['design', 0, :, :] * res.smoothed_state, axis=0))
    assert_allclose(actual, res_ols.resid)

    # Given the estimate of scale as `sum(v_t^2 / f_t) / (T - d)` (see
    # Harvey, 1989, 4.2.5 on p. 183), then llf_recursive is equivalent to the
    # full OLS loglikelihood (i.e. without the scale concentrated out).
    desired = mod_ols.loglike(res_ols.params, scale=res_ols.scale)
    assert_allclose(res.llf_recursive, desired)
    # Alternatively, we can constrcut the concentrated OLS loglikelihood
    # by computing the scale term with `nobs` in the denominator rather than
    # `nobs - d`.
    scale_alternative = np.sum(
        (res.standardized_forecasts_error[0, 1:] *
         res.filter_results.obs_cov[0, 0]**0.5)**2) / mod.nobs
    llf_alternative = np.log(
        norm.pdf(res.resid_recursive, loc=0,
                 scale=scale_alternative**0.5)).sum()
    assert_allclose(llf_alternative, res_ols.llf)

    # Prediction
    actual = res.forecast(10, design=np.ones((1, 1, 10)))
    assert_allclose(actual, res_ols.predict(np.ones((10, 1))))

    # Sums of squares, R^2
    assert_allclose(res.ess, res_ols.ess)
    assert_allclose(res.ssr, res_ols.ssr)
    assert_allclose(res.centered_tss, res_ols.centered_tss)
    assert_allclose(res.uncentered_tss, res_ols.uncentered_tss)
    assert_allclose(res.rsquared, res_ols.rsquared)

    # Mean squares
    assert_allclose(res.mse_model, res_ols.mse_model)
    assert_allclose(res.mse_resid, res_ols.mse_resid)
    assert_allclose(res.mse_total, res_ols.mse_total)

    # Hypothesis tests
    actual = res.t_test('m1 = 0')
    desired = res_ols.t_test('m1 = 0')
    assert_allclose(actual.statistic, desired.statistic)
    assert_allclose(actual.pvalue, desired.pvalue, atol=1e-15)

    actual = res.f_test('m1 = 0')
    desired = res_ols.f_test('m1 = 0')
    assert_allclose(actual.statistic, desired.statistic)
    assert_allclose(actual.pvalue, desired.pvalue, atol=1e-15)

    # Information criteria
    # Note: the llf and llf_obs given in the results are based on the Kalman
    # filter and so the ic given in results will not be identical to the
    # OLS versions. Additionally, llf_recursive is comparable to the
    # non-concentrated llf, and not the concentrated llf that is by default
    # used in OLS. Compute new ic based on llf_alternative to compare.
    actual_aic = aic(llf_alternative, res.nobs_effective, res.df_model)
    assert_allclose(actual_aic, res_ols.aic)
    actual_bic = bic(llf_alternative, res.nobs_effective, res.df_model)
    assert_allclose(actual_bic, res_ols.bic)
Ejemplo n.º 5
0
def test_ols():
    # More comprehensive tests against OLS estimates
    mod = RecursiveLS(endog, dta['m1'])
    res = mod.fit()

    mod_ols = OLS(endog, dta['m1'])
    res_ols = mod_ols.fit()

    # Regression coefficients, standard errors, and estimated scale
    assert_allclose(res.params, res_ols.params)
    assert_allclose(res.bse, res_ols.bse)
    # Note: scale here is computed according to Harvey, 1989, 4.2.5, and is
    # the called the ML estimator and sometimes (e.g. later in section 5)
    # denoted \tilde \sigma_*^2
    assert_allclose(res.filter_results.obs_cov[0, 0], res_ols.scale)

    # OLS residuals are equivalent to smoothed forecast errors
    # (the latter are defined as e_t|T by Harvey, 1989, 5.4.5)
    # (this follows since the smoothed state simply contains the
    # full-information estimates of the regression coefficients)
    actual = (mod.endog[:, 0] -
              np.sum(mod['design', 0, :, :] * res.smoothed_state, axis=0))
    assert_allclose(actual, res_ols.resid)

    # Given the estimate of scale as `sum(v_t^2 / f_t) / (T - d)` (see
    # Harvey, 1989, 4.2.5 on p. 183), then llf_recursive is equivalent to the
    # full OLS loglikelihood (i.e. without the scale concentrated out).
    desired = mod_ols.loglike(res_ols.params, scale=res_ols.scale)
    assert_allclose(res.llf_recursive, desired)
    # Alternatively, we can constrcut the concentrated OLS loglikelihood
    # by computing the scale term with `nobs` in the denominator rather than
    # `nobs - d`.
    scale_alternative = np.sum((
        res.standardized_forecasts_error[0, 1:] *
        res.filter_results.obs_cov[0, 0]**0.5)**2) / mod.nobs
    llf_alternative = np.log(norm.pdf(res.resid_recursive, loc=0,
                                      scale=scale_alternative**0.5)).sum()
    assert_allclose(llf_alternative, res_ols.llf)

    # Prediction
    actual = res.forecast(10, design=np.ones((1, 1, 10)))
    assert_allclose(actual, res_ols.predict(np.ones((10, 1))))

    # Sums of squares, R^2
    assert_allclose(res.ess, res_ols.ess)
    assert_allclose(res.ssr, res_ols.ssr)
    assert_allclose(res.centered_tss, res_ols.centered_tss)
    assert_allclose(res.uncentered_tss, res_ols.uncentered_tss)
    assert_allclose(res.rsquared, res_ols.rsquared)

    # Mean squares
    assert_allclose(res.mse_model, res_ols.mse_model)
    assert_allclose(res.mse_resid, res_ols.mse_resid)
    assert_allclose(res.mse_total, res_ols.mse_total)

    # Hypothesis tests
    actual = res.t_test('m1 = 0')
    desired = res_ols.t_test('m1 = 0')
    assert_allclose(actual.statistic, desired.statistic)
    assert_allclose(actual.pvalue, desired.pvalue, atol=1e-15)

    actual = res.f_test('m1 = 0')
    desired = res_ols.f_test('m1 = 0')
    assert_allclose(actual.statistic, desired.statistic)
    assert_allclose(actual.pvalue, desired.pvalue, atol=1e-15)

    # Information criteria
    # Note: the llf and llf_obs given in the results are based on the Kalman
    # filter and so the ic given in results will not be identical to the
    # OLS versions. Additionally, llf_recursive is comparable to the
    # non-concentrated llf, and not the concentrated llf that is by default
    # used in OLS. Compute new ic based on llf_alternative to compare.
    actual_aic = aic(llf_alternative, res.nobs_effective, res.df_model)
    assert_allclose(actual_aic, res_ols.aic)
    actual_bic = bic(llf_alternative, res.nobs_effective, res.df_model)
    assert_allclose(actual_bic, res_ols.bic)
Ejemplo n.º 6
0
 def bic(self):
     """
     (float) Bayes Information Criterion
     """
     # return -2*self.llf + self.params.shape[0]*np.log(self.nobs)
     return bic(self.llf, self.nobs, self.params.shape[0])
Ejemplo n.º 7
0
 def bic(self):
     """
     (float) Bayes Information Criterion
     """
     return bic(self.llf, self.nobs_effective, self.df_model)
Ejemplo n.º 8
0
 def bic(self):
     """
     (float) Bayes Information Criterion
     """
     # return -2*self.llf + self.params.shape[0]*np.log(self.nobs)
     return bic(self.llf, self.nobs, self.params.shape[0])
     good_ranks += 1
 else:
     bad_rank_subs.append(sub)
     bad_ranks.append(DV_4levels_sub_ranks)
     bad_probs.append(DV_vals2)
 #Run the logistic regressions
 X = sub_df[['gain','loss']]
 X['intercept'] = 1.0
 y = sub_df.accept
 #Run the full model
 model_full = sm.Logit(y, X, missing='drop')
 result_full = model_full.fit()
 #result.summary()
 coefficients_full = np.array(result_full.params)
 all_coefs.append(coefficients_full)
 bic_score_full += bic(result_full.llf,len(y),len(coefficients_full))
 #Run the intercept only
 model_intercept = sm.Logit(y, X['intercept'], missing='drop')
 result_intercept = model_intercept.fit()
 bic_score_intercept += bic(result_intercept.llf,len(y),1)
 #Run intercept & gain
 model_gain = sm.Logit(y, X[['gain', 'intercept']], missing='drop')
 result_gain = model_gain.fit()
 bic_score_gain += bic(result_gain.llf,len(y),2)
 #Run intercept & loss
 model_loss = sm.Logit(y, X[['loss', 'intercept']], missing='drop')
 result_loss = model_loss.fit()
 bic_score_loss += bic(result_loss.llf,len(y),2)
 bic_per_sub = [bic(result_full.llf,len(y),len(coefficients_full)), bic(result_intercept.llf,len(y),1),
 bic(result_gain.llf,len(y),2), bic(result_loss.llf,len(y),2)]
 bic_all.append(bic_per_sub)