def setup_class(cls): cls.res2 = results_st.results_poisson_clu mod = smd.Poisson(endog, exog) # scaling of cov_params_default to match Stata # TODO should the default be changed? nobs, k_params = mod.exog.shape sc_fact = (nobs - 1.) / float(nobs - k_params) cls.res1 = mod.fit( disp=False, cov_type='cluster', cov_kwds=dict(groups=group, use_correction=True, scaling_factor=1. / sc_fact, df_correction=True), #TODO has no effect use_t=False, #True, ) # The model results, t_test, ... should also work without # normalized_cov_params, see #2209 # Note: we cannot set on the wrapper res1, we need res1._results cls.res1._results.normalized_cov_params = None cls.bse_rob = cls.res1.bse # backwards compatibility with inherited test methods cls.corr_fact = 1
def setup_class(cls): cls.res2 = results_st.results_poisson_clu mod = smd.Poisson(endog, exog) cls.res1 = res1 = mod.fit(disp=False) debug = False if debug: # for debugging cls.bse_nonrobust = cls.res1.bse.copy() cls.res1 = res1 = mod.fit(disp=False) cls.get_robust_clu() cls.res3 = cls.res1 cls.bse_rob3 = cls.bse_rob.copy() cls.res1 = res1 = mod.fit(disp=False) from statsmodels.base.covtype import get_robustcov_results #res_hc0_ = cls.res1.get_robustcov_results('HC1') get_robustcov_results(cls.res1._results, 'cluster', groups=group, use_correction=True, df_correction=True, #TODO has no effect use_t=False, #True, use_self=True) cls.bse_rob = cls.res1.bse cls.corr_fact = cls.get_correction_factor(cls.res1)
def setup_class(cls): cls.res2 = results_st.results_poisson_clu mod = smd.Poisson(endog, exog) cls.res1 = res1 = mod.fit(disp=False) debug = False if debug: # for debugging cls.bse_nonrobust = cls.res1.bse.copy() cls.res1 = res1 = mod.fit(disp=False) cls.get_robust_clu() cls.res3 = cls.res1 cls.bse_rob3 = cls.bse_rob.copy() cls.res1 = res1 = mod.fit(disp=False) from statsmodels.base.covtype import get_robustcov_results #res_hc0_ = cls.res1.get_robustcov_results('HC1') get_robustcov_results( cls.res1._results, 'cluster', groups=group, use_correction=True, df_correction=True, #TODO has no effect use_t=False, #True, use_self=True) cls.bse_rob = cls.res1.bse nobs, k_vars = res1.model.exog.shape k_params = len(res1.params) #n_groups = len(np.unique(group)) corr_fact = (nobs - 1.) / float(nobs - k_params) # for bse we need sqrt of correction factor cls.corr_fact = np.sqrt(corr_fact)
def setup_class(cls): cls.res1 = GLM(cpunish_data.endog, cpunish_data.exog, family=sm.families.Poisson()).fit() # compare with discrete, start close to save time modd = discrete.Poisson(cpunish_data.endog, cpunish_data.exog) cls.res2 = res_stata.results_poisson_none_nonrobust
def setup_class(cls): cls.res2 = results_st.results_poisson_exposure_hc1 mod = smd.Poisson(endog, exog, exposure=exposure) cls.res1 = mod.fit(disp=False, cov_type='HC1') cls.bse_rob = cls.res1.bse cls.corr_fact = cls.get_correction_factor(cls.res1, sub_kparams=False)
def setup_class(cls): fweights = [1, 1, 1, 2, 2, 2, 3, 3, 3, 1, 1, 1, 2, 2, 2, 3, 3] fweights = np.array(fweights) cls.res1 = GLM(cpunish_data.endog, cpunish_data.exog, family=sm.families.Poisson(), freq_weights=fweights).fit() # compare with discrete, start close to save time modd = discrete.Poisson(cpunish_data.endog, cpunish_data.exog) cls.res2 = res_stata.results_poisson_fweight_nonrobust
def setup_class(cls): cls.res2 = results_st.results_poisson_exposure_hc1 mod = smd.Poisson(endog, exog, exposure=exposure) cls.res1 = mod.fit(disp=False, cov_type='HC1') cls.bse_rob = cls.res1.bse nobs, k_vars = mod.exog.shape corr_fact = (nobs) / float(nobs - 1.) # for bse we need sqrt of correction factor cls.corr_fact = np.sqrt(1. / corr_fact)
def setup_class(cls): cls.res2 = results_st.results_poisson_hc1 mod = smd.Poisson(endog, exog) cls.res1 = mod.fit(disp=False) from statsmodels.base.covtype import get_robustcov_results #res_hc0_ = cls.res1.get_robustcov_results('HC1') get_robustcov_results(cls.res1._results, 'HC1', use_self=True) cls.bse_rob = cls.res1.bse cls.corr_fact = cls.get_correction_factor(cls.res1, sub_kparams=False)
def setup_class(cls): np.random.seed(987125643) # not intentional seed endog_count = np.random.poisson(endog) cls.cov_type = 'HC0' mod1 = GLM(endog_count, exog, family=families.Poisson()) cls.res1 = mod1.fit(cov_type='HC0') mod1 = smd.Poisson(endog_count, exog) cls.res2 = mod1.fit(cov_type='HC0') cls.res1.rtol = 1e-11
def setup_class(cls): cls.res2 = results_st.results_poisson_hc1 mod = smd.Poisson(endog, exog) cls.res1 = mod.fit(disp=False) from statsmodels.base.covtype import get_robustcov_results #res_hc0_ = cls.res1.get_robustcov_results('HC1') get_robustcov_results(cls.res1._results, 'HC1', use_self=True) cls.bse_rob = cls.res1.bse nobs, k_vars = mod.exog.shape corr_fact = (nobs) / float(nobs - 1.) # for bse we need sqrt of correction factor cls.corr_fact = np.sqrt(1. / corr_fact)
def setupClass(cls): self = cls # alias fweights = [1, 1, 1, 2, 2, 2, 3, 3, 3, 1, 1, 1, 2, 2, 2, 3, 3] # faking aweights by using normalized freq_weights fweights = np.array(fweights) wsum = fweights.sum() nobs = len(cpunish_data.endog) aweights = fweights / wsum * nobs self.res1 = GLM(cpunish_data.endog, cpunish_data.exog, family=sm.families.Poisson(), freq_weights=aweights).fit() # compare with discrete, start close to save time modd = discrete.Poisson(cpunish_data.endog, cpunish_data.exog) self.res2 = res_stata.results_poisson_aweight_nonrobust
def __init__(self): ''' Tests Poisson family with canonical log link. Test results were obtained by R. ''' from .results.results_glm import Cpunish from statsmodels.datasets.cpunish import load self.data = load() self.data.exog[:,3] = np.log(self.data.exog[:,3]) self.data.exog = add_constant(self.data.exog, prepend=False) self.res1 = GLM(self.data.endog, self.data.exog, family=sm.families.Poisson()).fit() self.res2 = Cpunish() # compare with discrete, start close to save time modd = discrete.Poisson(self.data.endog, self.data.exog) self.resd = modd.fit(start_params=self.res1.params * 0.9, disp=False)
def setup_class(cls): cls.res2 = results_st.results_poisson_exposure_clu #nonrobust mod = smd.Poisson(endog, exog, exposure=exposure) cls.res1 = res1 = mod.fit(disp=False) from statsmodels.base.covtype import get_robustcov_results #res_hc0_ = cls.res1.get_robustcov_results('HC1') get_robustcov_results(cls.res1._results, 'cluster', groups=group, use_correction=True, df_correction=True, #TODO has no effect use_t=False, #True, use_self=True) cls.bse_rob = cls.res1.bse #sw.se_cov(cov_clu) cls.corr_fact = cls.get_correction_factor(cls.res1)
def fit_data(data, explained, explanatory, poisson=False): """Fit a response variable (e.g. intensity) to a list of explanatory variables The fitting is run twice, restricting to the significant explanatory variables in the second run. Parameters ---------- data : DataFrame { year, `explained`, `explanatory`, ... } An intercept column is added automatically. explained : str Name of explained variable, e.g. 'intensity'. explanatory : list of str Names of explanatory variables, e.g. ['gmt','esoi']. poisson : boolean Optionally, use Poisson regression for fitting. If False (default), uses ordinary least squares (OLS) regression. Returns ------- sm_results : pair of statsmodels Results object Results for first and second run. """ d_explained = data[explained] d_explanatory = data[explanatory] # for the first run, assume that all variables are significant significant = explanatory sm_results = [] for _ in range(2): # restrict to variables with significant relationship d_explanatory = d_explanatory[significant] # add column for intercept d_explanatory['const'] = 1.0 if poisson: mod = smd.Poisson(d_explained, d_explanatory) res = mod.fit(maxiter=100, disp=0, cov_type='HC1') else: mod = sm.OLS(d_explained, d_explanatory) res = mod.fit(maxiter=100, disp=0, cov_type='HC1', use_t=True) significant = fit_significant(res) sm_results.append(res) return sm_results
def setup_class(cls): fweights = [1, 1, 1, 2, 2, 2, 3, 3, 3, 1, 1, 1, 2, 2, 2, 3, 3] # faking aweights by using normalized freq_weights fweights = np.array(fweights) wsum = fweights.sum() nobs = len(cpunish_data.endog) aweights = fweights / wsum * nobs cls.res1 = GLM(cpunish_data.endog, cpunish_data.exog, family=sm.families.Poisson(), var_weights=aweights).fit() # compare with discrete, start close to save time modd = discrete.Poisson(cpunish_data.endog, cpunish_data.exog) # Need to copy to avoid inplace adjustment from copy import copy cls.res2 = copy(res_stata.results_poisson_aweight_nonrobust) cls.res2.resids = cls.res2.resids.copy() # Need to adjust resids for pearson and deviance to add weights cls.res2.resids[:, 3:5] *= np.sqrt(aweights[:, np.newaxis])
def setup_class(cls): cls.res2 = results_st.results_poisson_clu mod = smd.Poisson(endog, exog) # scaling of cov_params_default to match Stata # TODO should the default be changed? nobs, k_params = mod.exog.shape sc_fact = (nobs - 1.) / float(nobs - k_params) cls.res1 = mod.fit( disp=False, cov_type='cluster', cov_kwds=dict(groups=group, use_correction=True, scaling_factor=1. / sc_fact, df_correction=True), #TODO has no effect use_t=False, #True, ) cls.bse_rob = cls.res1.bse # backwards compatibility with inherited test methods cls.corr_fact = 1
for t in Teams: homeResults = season.loc[(season['HomeTeam'] == t)] awayResults = season.loc[(season['AwayTeam'] == t)] homeAttack = homeResults['FTHG'] homeDefence = homeResults['FTAG'] alpha = list(homeAttack) beta = list(homeDefence) awayAttack = awayResults['FTAG'] awayDefence = awayResults['FTHG'] gamma = list(awayAttack) delta = list(awayDefence) if len(alpha) != len(beta): raise Exception('Check Alpha and Delta lengths for ', t) if len(gamma) != len(delta): raise Exception('Check Gamma and Delta lengths for ', t) resAlpha = sm.Poisson(alpha,np.ones_like(alpha)).fit(disp=0) resBeta = sm.Poisson(beta,np.ones_like(beta)).fit(disp=0) resGamma = sm.Poisson(gamma,np.ones_like(gamma)).fit(disp=0) resDelta = sm.Poisson(delta,np.ones_like(delta)).fit(disp=0) allresults = [t, np.exp(resAlpha.params[0]), np.exp(resBeta.params[0]), np.exp(resGamma.params[0]), np.exp(resDelta.params[0])] factors.loc[i] = allresults i = i+1 factors # In[3]: homeResults = season.loc[(season['HomeTeam'] == 'QPR')] awayResults = season.loc[(season['AwayTeam'] == 'QPR')]
def setup_class(cls): cls.res2 = results_st.results_poisson_clu mod = smd.Poisson(endog, exog) cls.res1 = mod.fit(disp=False) cls.get_robust_clu()
def setup_class(cls): cls.res2 = results_st.results_poisson_clu mod = smd.Poisson(endog, exog) mod = GLM(endog, exog, family=families.Poisson()) cls.res1 = mod.fit() cls.get_robust_clu()
def setup_class(cls): cls.res2 = results_st.results_poisson_exposure_clu #nonrobust mod = smd.Poisson(endog, exog, exposure=exposure) cls.res1 = mod.fit(disp=False) cls.get_robust_clu()
# axs[0,1].plot(fpr2, tpr2, marker='.', label='Logistic Regression') # axs[0,1].set_title('Y ~ wh_ratio') # axs[1,0].plot(ns_fpr1, ns_tpr1, linestyle='--', label='No Skill') # axs[1,0].plot(fpr3, tpr3, marker='.', label='Logistic Regression') # axs[1,0].set_title('Y ~ age + gender + BMI + wh_ratio') # axs[1,1].plot(ns_fpr1, ns_tpr1, linestyle='--', label='No Skill') # axs[1,1].plot(fpr4, tpr4, marker='.', label='Logistic Regression') # axs[1,1].set_title('Y ~ age + gender + BMI + wh_ratio + genderxBMI + genderxwh_ratio') # plt.show() # Problem #2 data2 = pd.read_csv('https://donatello-telesca.squarespace.com/s/medpar.csv') data2 = smtools.add_constant(data2) data2['type2xage80'] = data2.type2 * data2.age80 data2['type3xage80'] = data2.type3 * data2.age80 pmod = sm_mod.Poisson(data2.los, data2[['const', 'type2', 'type3']]) pmod_results = pmod.fit() print(pmod_results.summary()) pmod2 = sm_mod.Poisson(data2.los, data2[['const', 'type2', 'type3', 'age80']]) pmod2_results = pmod2.fit() print(pmod2_results.summary()) pmod3 = sm_mod.Poisson( data2.los, data2[['const', 'type2', 'type3', 'age80', 'type2xage80', 'type3xage80']]) pmod3_results = pmod3.fit() print(pmod3_results.summary()) # Generate diagnostic plots for poisson # Fitted vs. pearson residual