def setup_class(cls): df = data_bin res = GLM(df['constrict'], df[['const', 'log_rate', 'log_volumne']], family=families.Binomial()).fit(attach_wls=True, atol=1e-10) cls.infl1 = res.get_influence() cls.infl0 = MLEInfluence(res)
def setup_class(cls): yi = np.array([0, 2, 14, 19, 30]) ni = 40 * np.ones(len(yi)) xi = np.arange(1, len(yi) + 1) exog = np.column_stack((np.ones(len(yi)), xi)) endog = np.column_stack((yi, ni - yi)) res = GLM(endog, exog, family=families.Binomial()).fit() cls.infl1 = res.get_influence() cls.infl0 = MLEInfluence(res) cls.cd_rtol = 5e-5
def test_influence_glm_bernoulli(): # example uses Finney's data and is used in Pregibon 1981 df = data_bin results_sas = np.asarray(results_sas_df) res = GLM(df['constrict'], df[['const', 'log_rate', 'log_volumne']], family=families.Binomial()).fit(attach_wls=True, atol=1e-10) infl = res.get_influence(observed=False) k_vars = 3 assert_allclose(infl.dfbetas, results_sas[:, 5:8], atol=1e-4) assert_allclose(infl.d_params, results_sas[:, 5:8] * res.bse.values, atol=1e-4) assert_allclose(infl.cooks_distance[0] * k_vars, results_sas[:, 8], atol=6e-5) assert_allclose(infl.hat_matrix_diag, results_sas[:, 4], atol=6e-5) c_bar = infl.cooks_distance[0] * 3 * (1 - infl.hat_matrix_diag) assert_allclose(c_bar, results_sas[:, 9], atol=6e-5)
# # This measures are based on a one-step approximation to the the results # for deleting one observation. One-step approximations are usually accurate # for small changes but underestimate the magnitude of large changes. Event # though large changes are underestimated, they still show clearly the # effect of influential observations # # In this example observation 4 and 18 have a large standardized residual # and large Cook's distance, but not a large leverage. Observation 13 has # the largest leverage but only small Cook's distance and not a large # studentized residual. # # Only the two observations 4 and 18 have a large impact on the parameter # estimates. infl = res.get_influence(observed=False) summ_df = infl.summary_frame() summ_df.sort_values("cooks_d", ascending=False)[:10] fig = infl.plot_influence() fig.tight_layout(pad=1.0) fig = infl.plot_index(y_var="cooks", threshold=2 * infl.cooks_distance[0].mean()) fig.tight_layout(pad=1.0) fig = infl.plot_index(y_var="resid", threshold=1) fig.tight_layout(pad=1.0) fig = infl.plot_index(y_var="dfbeta", idx=1, threshold=0.5)
# # This measures are based on a one-step approximation to the the results # for deleting one observation. One-step approximations are usually accurate # for small changes but underestimate the magnitude of large changes. Event # though large changes are underestimated, they still show clearly the # effect of influential observations # # In this example observation 4 and 18 have a large standardized residual # and large Cook's distance, but not a large leverage. Observation 13 has # the largest leverage but only small Cook's distance and not a large # studentized residual. # # Only the two observations 4 and 18 have a large impact on the parameter # estimates. infl = res.get_influence(observed=False) summ_df = infl.summary_frame() summ_df.sort_values('cooks_d', ascending=False)[:10] infl.plot_influence() infl.plot_index(y_var='cooks', threshold=2 * infl.cooks_distance[0].mean()) infl.plot_index(y_var='resid', threshold=1) infl.plot_index(y_var='dfbeta', idx=1, threshold=0.5) infl.plot_index(y_var='dfbeta', idx=2, threshold=0.5) infl.plot_index(y_var='dfbeta', idx=0, threshold=0.5)