def test_ic(): #test information criteria #consistency check ics = [aic, aicc, bic, hqic] ics_sig = [aic_sigma, aicc_sigma, bic_sigma, hqic_sigma] for ic, ic_sig in zip(ics, ics_sig): assert_(ic(np.array(2),10,2).dtype == np.float, msg=repr(ic)) assert_(ic_sig(np.array(2),10,2).dtype == np.float, msg=repr(ic_sig) ) assert_almost_equal(ic(-10./2.*np.log(2.),10,2)/10, ic_sig(2, 10, 2), decimal=14) assert_almost_equal(ic_sig(np.log(2.),10,2, islog=True), ic_sig(2, 10, 2), decimal=14) #examples penalty directly from formula n, k = 10, 2 assert_almost_equal(aic(0, 10, 2), 2*k, decimal=14) #next see Wikipedia assert_almost_equal(aicc(0, 10, 2), aic(0, n, k) + 2*k*(k+1.)/(n-k-1.), decimal=14) assert_almost_equal(bic(0, 10, 2), np.log(n)*k, decimal=14) assert_almost_equal(hqic(0, 10, 2), 2*np.log(np.log(n))*k, decimal=14)
def test_ic(): #test information criteria #consistency check ics = [aic, aicc, bic, hqic] ics_sig = [aic_sigma, aicc_sigma, bic_sigma, hqic_sigma] for ic, ic_sig in zip(ics, ics_sig): assert_(ic(np.array(2), 10, 2).dtype == np.float, msg=repr(ic)) assert_(ic_sig(np.array(2), 10, 2).dtype == np.float, msg=repr(ic_sig)) assert_almost_equal(ic(-10. / 2. * np.log(2.), 10, 2) / 10, ic_sig(2, 10, 2), decimal=14) assert_almost_equal(ic_sig(np.log(2.), 10, 2, islog=True), ic_sig(2, 10, 2), decimal=14) #examples penalty directly from formula n, k = 10, 2 assert_almost_equal(aic(0, 10, 2), 2 * k, decimal=14) #next see Wikipedia assert_almost_equal(aicc(0, 10, 2), aic(0, n, k) + 2 * k * (k + 1.) / (n - k - 1.), decimal=14) assert_almost_equal(bic(0, 10, 2), np.log(n) * k, decimal=14) assert_almost_equal(hqic(0, 10, 2), 2 * np.log(np.log(n)) * k, decimal=14)
def test_ic(): # test information criteria # examples penalty directly from formula n = 10 k = 2 assert_almost_equal(aic(0, 10, 2), 2*k, decimal=14) # next see Wikipedia assert_almost_equal(aicc(0, 10, 2), aic(0, n, k) + 2*k*(k+1.)/(n-k-1.), decimal=14) assert_almost_equal(bic(0, 10, 2), np.log(n)*k, decimal=14) assert_almost_equal(hqic(0, 10, 2), 2*np.log(np.log(n))*k, decimal=14)
def test_ic(): # test information criteria # examples penalty directly from formula n = 10 k = 2 assert_almost_equal(aic(0, 10, 2), 2*k, decimal=14) # next see Wikipedia assert_almost_equal(aicc(0, 10, 2), aic(0, n, k) + 2*k*(k+1.)/(n-k-1.), decimal=14) assert_almost_equal(bic(0, 10, 2), np.log(n)*k, decimal=14) assert_almost_equal(hqic(0, 10, 2), 2*np.log(np.log(n))*k, decimal=14)
def test_ols(): # More comprehensive tests against OLS estimates mod = RecursiveLS(endog, dta['m1']) res = mod.fit() mod_ols = OLS(endog, dta['m1']) res_ols = mod_ols.fit() # Regression coefficients, standard errors, and estimated scale assert_allclose(res.params, res_ols.params) assert_allclose(res.bse, res_ols.bse) # Note: scale here is computed according to Harvey, 1989, 4.2.5, and is # the called the ML estimator and sometimes (e.g. later in section 5) # denoted \tilde \sigma_*^2 assert_allclose(res.filter_results.obs_cov[0, 0], res_ols.scale) # OLS residuals are equivalent to smoothed forecast errors # (the latter are defined as e_t|T by Harvey, 1989, 5.4.5) # (this follows since the smoothed state simply contains the # full-information estimates of the regression coefficients) actual = (mod.endog[:, 0] - np.sum(mod['design', 0, :, :] * res.smoothed_state, axis=0)) assert_allclose(actual, res_ols.resid) # Given the estimate of scale as `sum(v_t^2 / f_t) / (T - d)` (see # Harvey, 1989, 4.2.5 on p. 183), then llf_recursive is equivalent to the # full OLS loglikelihood (i.e. without the scale concentrated out). desired = mod_ols.loglike(res_ols.params, scale=res_ols.scale) assert_allclose(res.llf_recursive, desired) # Alternatively, we can constrcut the concentrated OLS loglikelihood # by computing the scale term with `nobs` in the denominator rather than # `nobs - d`. scale_alternative = np.sum( (res.standardized_forecasts_error[0, 1:] * res.filter_results.obs_cov[0, 0]**0.5)**2) / mod.nobs llf_alternative = np.log( norm.pdf(res.resid_recursive, loc=0, scale=scale_alternative**0.5)).sum() assert_allclose(llf_alternative, res_ols.llf) # Prediction actual = res.forecast(10, design=np.ones((1, 1, 10))) assert_allclose(actual, res_ols.predict(np.ones((10, 1)))) # Sums of squares, R^2 assert_allclose(res.ess, res_ols.ess) assert_allclose(res.ssr, res_ols.ssr) assert_allclose(res.centered_tss, res_ols.centered_tss) assert_allclose(res.uncentered_tss, res_ols.uncentered_tss) assert_allclose(res.rsquared, res_ols.rsquared) # Mean squares assert_allclose(res.mse_model, res_ols.mse_model) assert_allclose(res.mse_resid, res_ols.mse_resid) assert_allclose(res.mse_total, res_ols.mse_total) # Hypothesis tests actual = res.t_test('m1 = 0') desired = res_ols.t_test('m1 = 0') assert_allclose(actual.statistic, desired.statistic) assert_allclose(actual.pvalue, desired.pvalue, atol=1e-15) actual = res.f_test('m1 = 0') desired = res_ols.f_test('m1 = 0') assert_allclose(actual.statistic, desired.statistic) assert_allclose(actual.pvalue, desired.pvalue, atol=1e-15) # Information criteria # Note: the llf and llf_obs given in the results are based on the Kalman # filter and so the ic given in results will not be identical to the # OLS versions. Additionally, llf_recursive is comparable to the # non-concentrated llf, and not the concentrated llf that is by default # used in OLS. Compute new ic based on llf_alternative to compare. actual_aic = aic(llf_alternative, res.nobs_effective, res.df_model) assert_allclose(actual_aic, res_ols.aic) actual_bic = bic(llf_alternative, res.nobs_effective, res.df_model) assert_allclose(actual_bic, res_ols.bic)
def test_glm(constraints=None): # More comprehensive tests against GLM estimates (this is sort of redundant # given `test_ols`, but this is mostly to complement the tests in # `test_glm_constrained`) endog = dta.infl exog = add_constant(dta[['unemp', 'm1']]) mod = RecursiveLS(endog, exog, constraints=constraints) res = mod.fit() mod_glm = GLM(endog, exog) if constraints is None: res_glm = mod_glm.fit() else: res_glm = mod_glm.fit_constrained(constraints=constraints) # Regression coefficients, standard errors, and estimated scale assert_allclose(res.params, res_glm.params) assert_allclose(res.bse, res_glm.bse, atol=1e-6) # Note: scale here is computed according to Harvey, 1989, 4.2.5, and is # the called the ML estimator and sometimes (e.g. later in section 5) # denoted \tilde \sigma_*^2 assert_allclose(res.filter_results.obs_cov[0, 0], res_glm.scale) # DoF # Note: GLM does not include intercept in DoF, so modify by -1 assert_equal(res.df_model - 1, res_glm.df_model) # OLS residuals are equivalent to smoothed forecast errors # (the latter are defined as e_t|T by Harvey, 1989, 5.4.5) # (this follows since the smoothed state simply contains the # full-information estimates of the regression coefficients) actual = (mod.endog[:, 0] - np.sum(mod['design', 0, :, :] * res.smoothed_state, axis=0)) assert_allclose(actual, res_glm.resid_response, atol=1e-7) # Given the estimate of scale as `sum(v_t^2 / f_t) / (T - d)` (see # Harvey, 1989, 4.2.5 on p. 183), then llf_recursive is equivalent to the # full OLS loglikelihood (i.e. without the scale concentrated out). desired = mod_glm.loglike(res_glm.params, scale=res_glm.scale) assert_allclose(res.llf_recursive, desired) # Alternatively, we can construct the concentrated OLS loglikelihood # by computing the scale term with `nobs` in the denominator rather than # `nobs - d`. scale_alternative = np.sum( (res.standardized_forecasts_error[0, 1:] * res.filter_results.obs_cov[0, 0]**0.5)**2) / mod.nobs llf_alternative = np.log( norm.pdf(res.resid_recursive, loc=0, scale=scale_alternative**0.5)).sum() assert_allclose(llf_alternative, res_glm.llf) # Prediction # TODO: prediction in this case is not working. if constraints is None: design = np.ones((1, 3, 10)) actual = res.forecast(10, design=design) assert_allclose(actual, res_glm.predict(np.ones((10, 3)))) else: design = np.ones((2, 3, 10)) assert_raises(NotImplementedError, res.forecast, 10, design=design) # Hypothesis tests actual = res.t_test('m1 = 0') desired = res_glm.t_test('m1 = 0') assert_allclose(actual.statistic, desired.statistic) assert_allclose(actual.pvalue, desired.pvalue, atol=1e-15) actual = res.f_test('m1 = 0') desired = res_glm.f_test('m1 = 0') assert_allclose(actual.statistic, desired.statistic) assert_allclose(actual.pvalue, desired.pvalue) # Information criteria # Note: the llf and llf_obs given in the results are based on the Kalman # filter and so the ic given in results will not be identical to the # OLS versions. Additionally, llf_recursive is comparable to the # non-concentrated llf, and not the concentrated llf that is by default # used in OLS. Compute new ic based on llf_alternative to compare. actual_aic = aic(llf_alternative, res.nobs_effective, res.df_model) assert_allclose(actual_aic, res_glm.aic)
def test_ols(): # More comprehensive tests against OLS estimates mod = RecursiveLS(endog, dta['m1']) res = mod.fit() mod_ols = OLS(endog, dta['m1']) res_ols = mod_ols.fit() # Regression coefficients, standard errors, and estimated scale assert_allclose(res.params, res_ols.params) assert_allclose(res.bse, res_ols.bse) # Note: scale here is computed according to Harvey, 1989, 4.2.5, and is # the called the ML estimator and sometimes (e.g. later in section 5) # denoted \tilde \sigma_*^2 assert_allclose(res.filter_results.obs_cov[0, 0], res_ols.scale) # OLS residuals are equivalent to smoothed forecast errors # (the latter are defined as e_t|T by Harvey, 1989, 5.4.5) # (this follows since the smoothed state simply contains the # full-information estimates of the regression coefficients) actual = (mod.endog[:, 0] - np.sum(mod['design', 0, :, :] * res.smoothed_state, axis=0)) assert_allclose(actual, res_ols.resid) # Given the estimate of scale as `sum(v_t^2 / f_t) / (T - d)` (see # Harvey, 1989, 4.2.5 on p. 183), then llf_recursive is equivalent to the # full OLS loglikelihood (i.e. without the scale concentrated out). desired = mod_ols.loglike(res_ols.params, scale=res_ols.scale) assert_allclose(res.llf_recursive, desired) # Alternatively, we can constrcut the concentrated OLS loglikelihood # by computing the scale term with `nobs` in the denominator rather than # `nobs - d`. scale_alternative = np.sum(( res.standardized_forecasts_error[0, 1:] * res.filter_results.obs_cov[0, 0]**0.5)**2) / mod.nobs llf_alternative = np.log(norm.pdf(res.resid_recursive, loc=0, scale=scale_alternative**0.5)).sum() assert_allclose(llf_alternative, res_ols.llf) # Prediction actual = res.forecast(10, design=np.ones((1, 1, 10))) assert_allclose(actual, res_ols.predict(np.ones((10, 1)))) # Sums of squares, R^2 assert_allclose(res.ess, res_ols.ess) assert_allclose(res.ssr, res_ols.ssr) assert_allclose(res.centered_tss, res_ols.centered_tss) assert_allclose(res.uncentered_tss, res_ols.uncentered_tss) assert_allclose(res.rsquared, res_ols.rsquared) # Mean squares assert_allclose(res.mse_model, res_ols.mse_model) assert_allclose(res.mse_resid, res_ols.mse_resid) assert_allclose(res.mse_total, res_ols.mse_total) # Hypothesis tests actual = res.t_test('m1 = 0') desired = res_ols.t_test('m1 = 0') assert_allclose(actual.statistic, desired.statistic) assert_allclose(actual.pvalue, desired.pvalue, atol=1e-15) actual = res.f_test('m1 = 0') desired = res_ols.f_test('m1 = 0') assert_allclose(actual.statistic, desired.statistic) assert_allclose(actual.pvalue, desired.pvalue, atol=1e-15) # Information criteria # Note: the llf and llf_obs given in the results are based on the Kalman # filter and so the ic given in results will not be identical to the # OLS versions. Additionally, llf_recursive is comparable to the # non-concentrated llf, and not the concentrated llf that is by default # used in OLS. Compute new ic based on llf_alternative to compare. actual_aic = aic(llf_alternative, res.nobs_effective, res.df_model) assert_allclose(actual_aic, res_ols.aic) actual_bic = bic(llf_alternative, res.nobs_effective, res.df_model) assert_allclose(actual_bic, res_ols.bic)
def test_glm(constraints=None): # More comprehensive tests against GLM estimates (this is sort of redundant # given `test_ols`, but this is mostly to complement the tests in # `test_glm_constrained`) endog = dta.infl exog = add_constant(dta[['unemp', 'm1']]) mod = RecursiveLS(endog, exog, constraints=constraints) res = mod.fit() mod_glm = GLM(endog, exog) if constraints is None: res_glm = mod_glm.fit() else: res_glm = mod_glm.fit_constrained(constraints=constraints) # Regression coefficients, standard errors, and estimated scale assert_allclose(res.params, res_glm.params) assert_allclose(res.bse, res_glm.bse, atol=1e-6) # Note: scale here is computed according to Harvey, 1989, 4.2.5, and is # the called the ML estimator and sometimes (e.g. later in section 5) # denoted \tilde \sigma_*^2 assert_allclose(res.filter_results.obs_cov[0, 0], res_glm.scale) # DoF # Note: GLM does not include intercept in DoF, so modify by -1 assert_equal(res.df_model - 1, res_glm.df_model) # OLS residuals are equivalent to smoothed forecast errors # (the latter are defined as e_t|T by Harvey, 1989, 5.4.5) # (this follows since the smoothed state simply contains the # full-information estimates of the regression coefficients) actual = (mod.endog[:, 0] - np.sum(mod['design', 0, :, :] * res.smoothed_state, axis=0)) assert_allclose(actual, res_glm.resid_response, atol=1e-7) # Given the estimate of scale as `sum(v_t^2 / f_t) / (T - d)` (see # Harvey, 1989, 4.2.5 on p. 183), then llf_recursive is equivalent to the # full OLS loglikelihood (i.e. without the scale concentrated out). desired = mod_glm.loglike(res_glm.params, scale=res_glm.scale) assert_allclose(res.llf_recursive, desired) # Alternatively, we can construct the concentrated OLS loglikelihood # by computing the scale term with `nobs` in the denominator rather than # `nobs - d`. scale_alternative = np.sum(( res.standardized_forecasts_error[0, 1:] * res.filter_results.obs_cov[0, 0]**0.5)**2) / mod.nobs llf_alternative = np.log(norm.pdf(res.resid_recursive, loc=0, scale=scale_alternative**0.5)).sum() assert_allclose(llf_alternative, res_glm.llf) # Prediction # TODO: prediction in this case is not working. if constraints is None: design = np.ones((1, 3, 10)) actual = res.forecast(10, design=design) assert_allclose(actual, res_glm.predict(np.ones((10, 3)))) else: design = np.ones((2, 3, 10)) assert_raises(NotImplementedError, res.forecast, 10, design=design) # Hypothesis tests actual = res.t_test('m1 = 0') desired = res_glm.t_test('m1 = 0') assert_allclose(actual.statistic, desired.statistic) assert_allclose(actual.pvalue, desired.pvalue, atol=1e-15) actual = res.f_test('m1 = 0') desired = res_glm.f_test('m1 = 0') assert_allclose(actual.statistic, desired.statistic) assert_allclose(actual.pvalue, desired.pvalue) # Information criteria # Note: the llf and llf_obs given in the results are based on the Kalman # filter and so the ic given in results will not be identical to the # OLS versions. Additionally, llf_recursive is comparable to the # non-concentrated llf, and not the concentrated llf that is by default # used in OLS. Compute new ic based on llf_alternative to compare. actual_aic = aic(llf_alternative, res.nobs_effective, res.df_model) assert_allclose(actual_aic, res_glm.aic)
def aic(self): """ (float) Akaike Information Criterion """ # return -2*self.llf + 2*self.params.shape[0] return aic(self.llf, self.nobs, self.params.shape[0])
'+ C(Thyroid_medication)' '+ C(Drink_alcohol_more_than_once_a_week)' '+ N_previous_admissions' '+ PSQI_sleep_quality_index_global_score' '+ Mean_right_hippocampal_volume' '+ Intracranial_volume' '+ Cholesterol' '+ C(Sex)' '+ Age' '+ C(Smoke)' '+ BMI' '+ med_index', data=logit_df) result = model.fit() main_effect_aic = aic(llf=-152.36, nobs=322, df_modelwc=22) print(result.summary()) print(np.exp(result.params)) print(np.exp(result.conf_int())) print(main_effect_aic) # Getting a dataframe of results results_df = logit_results_to_dataframe(results=result) logit_p_vals = result.pvalues # Adding in corrected p values corrected_log_p_vals = multipletests(pvals=logit_p_vals, alpha=0.05, method='fdr_bh') corrected_log_p_vals = list(corrected_log_p_vals[1]) results_df['fdr_pvals'] = corrected_log_p_vals # Re-ordering the output and exporting
def aic(self): """ (float) Akaike Information Criterion """ return aic(self.llf, self.nobs_effective, self.df_model)
def aic(self): """ (float) Akaike Information Criterion """ # return -2*self.llf + 2*self.params.shape[0] return aic(self.llf, self.nobs, self.params.shape[0])