def test_glm(self): # prelimnimary, getting started with basic test for GLM.get_prediction from statsmodels.genmod.generalized_linear_model import GLM res_wls = self.res_wls mod_wls = res_wls.model y, X, wi = mod_wls.endog, mod_wls.exog, mod_wls.weights w_sqrt = np.sqrt(wi) # notation wi is weights, `w` is var mod_glm = GLM(y * w_sqrt, X * w_sqrt[:,None]) # compare using t distribution res_glm = mod_glm.fit(use_t=True) pred_glm = res_glm.get_prediction() sf_glm = pred_glm.summary_frame() pred_res_wls = res_wls.get_prediction() sf_wls = pred_res_wls.summary_frame() n_compare = 30 # in glm with predict wendog assert_allclose(sf_glm.values[:n_compare], sf_wls.values[:n_compare, :4]) # compare using normal distribution res_glm = mod_glm.fit() # default use_t=False pred_glm = res_glm.get_prediction() sf_glm = pred_glm.summary_frame() res_wls = mod_wls.fit(use_t=False) pred_res_wls = res_wls.get_prediction() sf_wls = pred_res_wls.summary_frame() assert_allclose(sf_glm.values[:n_compare], sf_wls.values[:n_compare, :4]) # function for parameter transformation # should be separate test method from statsmodels.genmod._prediction import params_transform_univariate rates = params_transform_univariate(res_glm.params, res_glm.cov_params()) rates2 = np.column_stack((np.exp(res_glm.params), res_glm.bse * np.exp(res_glm.params), np.exp(res_glm.conf_int()))) assert_allclose(rates.summary_frame().values, rates2, rtol=1e-13) from statsmodels.genmod.families import links # with identity transform pt = params_transform_univariate(res_glm.params, res_glm.cov_params(), link=links.identity()) assert_allclose(pt.tvalues, res_glm.tvalues, rtol=1e-13) assert_allclose(pt.se_mean, res_glm.bse, rtol=1e-13) ptt = pt.t_test() assert_allclose(ptt[0], res_glm.tvalues, rtol=1e-13) assert_allclose(ptt[1], res_glm.pvalues, rtol=1e-13) # prediction with exog and no weights does not error res_glm = mod_glm.fit() pred_glm = res_glm.get_prediction(X)
def params_transform_univariate(params, cov_params, link=None, transform=None, row_labels=None): """ results for univariate, nonlinear, monotonicaly transformed parameters This provides transformed values, standard errors and confidence interval for transformations of parameters, for example in calculating rates with `exp(params)` in the case of Poisson or other models with exponential mean function. """ from statsmodels.genmod.families import links if link is None and transform is None: link = links.Log() if row_labels is None and hasattr(params, 'index'): row_labels = params.index params = np.asarray(params) predicted_mean = link.inverse(params) link_deriv = link.inverse_deriv(params) var_pred_mean = link_deriv**2 * np.diag(cov_params) # TODO: do we want covariance also, or just var/se dist = stats.norm # TODO: need ci for linear prediction, method of `lin_pred linpred = PredictionResults(params, np.diag(cov_params), dist=dist, row_labels=row_labels, link=links.identity()) res = PredictionResults(predicted_mean, var_pred_mean, dist=dist, row_labels=row_labels, linpred=linpred, link=link) return res
def test_glm(self): # prelimnimary, getting started with basic test for GLM.get_prediction from statsmodels.genmod.generalized_linear_model import GLM res_wls = self.res_wls mod_wls = res_wls.model y, X, wi = mod_wls.endog, mod_wls.exog, mod_wls.weights w_sqrt = np.sqrt(wi) # notation wi is weights, `w` is var mod_glm = GLM(y * w_sqrt, X * w_sqrt[:,None]) # compare using t distribution res_glm = mod_glm.fit(use_t=True) pred_glm = res_glm.get_prediction() sf_glm = pred_glm.summary_frame() pred_res_wls = res_wls.get_prediction() sf_wls = pred_res_wls.summary_frame() n_compare = 30 # in glm with predict wendog assert_allclose(sf_glm.values[:n_compare], sf_wls.values[:n_compare, :4]) # compare using normal distribution res_glm = mod_glm.fit() # default use_t=False pred_glm = res_glm.get_prediction() sf_glm = pred_glm.summary_frame() res_wls = mod_wls.fit(use_t=False) pred_res_wls = res_wls.get_prediction() sf_wls = pred_res_wls.summary_frame() assert_allclose(sf_glm.values[:n_compare], sf_wls.values[:n_compare, :4]) # function for parameter transformation # should be separate test method from statsmodels.genmod._prediction import params_transform_univariate rates = params_transform_univariate(res_glm.params, res_glm.cov_params()) rates2 = np.column_stack((np.exp(res_glm.params), res_glm.bse * np.exp(res_glm.params), np.exp(res_glm.conf_int()))) assert_allclose(rates.summary_frame().values, rates2, rtol=1e-13) from statsmodels.genmod.families import links # with identity transform pt = params_transform_univariate(res_glm.params, res_glm.cov_params(), link=links.identity()) assert_allclose(pt.tvalues, res_glm.tvalues, rtol=1e-13) assert_allclose(pt.se_mean, res_glm.bse, rtol=1e-13) ptt = pt.t_test() assert_allclose(ptt[0], res_glm.tvalues, rtol=1e-13) assert_allclose(ptt[1], res_glm.pvalues, rtol=1e-13) # prediction with exog and no weights does not error res_glm = mod_glm.fit() pred_glm = res_glm.get_prediction(X) # check that list works, issue 4437 x = res_glm.model.exog.mean(0) pred_res3 = res_glm.get_prediction(x) ci3 = pred_res3.conf_int() pred_res3b = res_glm.get_prediction(x.tolist()) ci3b = pred_res3b.conf_int() assert_allclose(pred_res3b.se_mean, pred_res3.se_mean, rtol=1e-13) assert_allclose(ci3b, ci3, rtol=1e-13) res_df = pred_res3b.summary_frame() assert_equal(res_df.index.values, [0]) x = res_glm.model.exog[-2:] pred_res3 = res_glm.get_prediction(x) ci3 = pred_res3.conf_int() pred_res3b = res_glm.get_prediction(x.tolist()) ci3b = pred_res3b.conf_int() assert_allclose(pred_res3b.se_mean, pred_res3.se_mean, rtol=1e-13) assert_allclose(ci3b, ci3, rtol=1e-13) res_df = pred_res3b.summary_frame() assert_equal(res_df.index.values, [0, 1])
w_sqrt = np.sqrt(w) mod_glm = GLM(y/w_sqrt, X/w_sqrt[:,None]) res_glm = mod_glm.fit() pred_glm = res_glm.get_prediction() print(pred_glm.summary_frame().head()) res_glm_t = mod_glm.fit(use_t=True) pred_glm_t = res_glm_t.get_prediction() print(pred_glm_t.summary_frame().head()) rates = params_transform_univariate(res_glm.params, res_glm.cov_params()) print('\nRates exp(params)') print(rates.summary_frame()) rates2 = np.column_stack((np.exp(res_glm.params), res_glm.bse * np.exp(res_glm.params), np.exp(res_glm.conf_int()))) assert_allclose(rates.summary_frame().values, rates2, rtol=1e-13) from statsmodels.genmod.families import links # with identity transform pt = params_transform_univariate(res_glm.params, res_glm.cov_params(), link=links.identity()) print(pt.tvalues) assert_allclose(pt.tvalues, res_glm.tvalues, rtol=1e-13) assert_allclose(pt.se_mean, res_glm.bse, rtol=1e-13) ptt = pt.t_test() assert_allclose(ptt[0], res_glm.tvalues, rtol=1e-13) assert_allclose(ptt[1], res_glm.pvalues, rtol=1e-13)
res_glm = mod_glm.fit() pred_glm = res_glm.get_prediction() print(pred_glm.summary_frame().head()) res_glm_t = mod_glm.fit(use_t=True) pred_glm_t = res_glm_t.get_prediction() print(pred_glm_t.summary_frame().head()) rates = params_transform_univariate(res_glm.params, res_glm.cov_params()) print('\nRates exp(params)') print(rates.summary_frame()) rates2 = np.column_stack( (np.exp(res_glm.params), res_glm.bse * np.exp(res_glm.params), np.exp(res_glm.conf_int()))) assert_allclose(rates.summary_frame().values, rates2, rtol=1e-13) from statsmodels.genmod.families import links # with identity transform pt = params_transform_univariate(res_glm.params, res_glm.cov_params(), link=links.identity()) print(pt.tvalues) assert_allclose(pt.tvalues, res_glm.tvalues, rtol=1e-13) assert_allclose(pt.se_mean, res_glm.bse, rtol=1e-13) ptt = pt.t_test() assert_allclose(ptt[0], res_glm.tvalues, rtol=1e-13) assert_allclose(ptt[1], res_glm.pvalues, rtol=1e-13)