def setup_class(cls):

        cls.res2 = results_st.results_poisson_clu
        mod = smd.Poisson(endog, exog)

        # scaling of cov_params_default to match Stata
        # TODO should the default be changed?
        nobs, k_params = mod.exog.shape
        sc_fact = (nobs - 1.) / float(nobs - k_params)

        cls.res1 = mod.fit(
            disp=False,
            cov_type='cluster',
            cov_kwds=dict(groups=group,
                          use_correction=True,
                          scaling_factor=1. / sc_fact,
                          df_correction=True),  #TODO has no effect
            use_t=False,  #True,
        )

        # The model results, t_test, ... should also work without
        # normalized_cov_params, see #2209
        # Note: we cannot set on the wrapper res1, we need res1._results
        cls.res1._results.normalized_cov_params = None

        cls.bse_rob = cls.res1.bse

        # backwards compatibility with inherited test methods
        cls.corr_fact = 1
    def setup_class(cls):
        cls.res2 = results_st.results_poisson_clu
        mod = smd.Poisson(endog, exog)
        cls.res1 = res1 = mod.fit(disp=False)

        debug = False
        if debug:
            # for debugging
            cls.bse_nonrobust = cls.res1.bse.copy()
            cls.res1 = res1 = mod.fit(disp=False)
            cls.get_robust_clu()
            cls.res3 = cls.res1
            cls.bse_rob3 = cls.bse_rob.copy()
            cls.res1 = res1 = mod.fit(disp=False)

        from statsmodels.base.covtype import get_robustcov_results

        #res_hc0_ = cls.res1.get_robustcov_results('HC1')
        get_robustcov_results(cls.res1._results, 'cluster',
                                                  groups=group,
                                                  use_correction=True,
                                                  df_correction=True,  #TODO has no effect
                                                  use_t=False, #True,
                                                  use_self=True)
        cls.bse_rob = cls.res1.bse

        cls.corr_fact = cls.get_correction_factor(cls.res1)
    def setup_class(cls):
        cls.res2 = results_st.results_poisson_clu
        mod = smd.Poisson(endog, exog)
        cls.res1 = res1 = mod.fit(disp=False)

        debug = False
        if debug:
            # for debugging
            cls.bse_nonrobust = cls.res1.bse.copy()
            cls.res1 = res1 = mod.fit(disp=False)
            cls.get_robust_clu()
            cls.res3 = cls.res1
            cls.bse_rob3 = cls.bse_rob.copy()
            cls.res1 = res1 = mod.fit(disp=False)

        from statsmodels.base.covtype import get_robustcov_results

        #res_hc0_ = cls.res1.get_robustcov_results('HC1')
        get_robustcov_results(
            cls.res1._results,
            'cluster',
            groups=group,
            use_correction=True,
            df_correction=True,  #TODO has no effect
            use_t=False,  #True,
            use_self=True)
        cls.bse_rob = cls.res1.bse

        nobs, k_vars = res1.model.exog.shape
        k_params = len(res1.params)
        #n_groups = len(np.unique(group))
        corr_fact = (nobs - 1.) / float(nobs - k_params)
        # for bse we need sqrt of correction factor
        cls.corr_fact = np.sqrt(corr_fact)
Beispiel #4
0
 def setup_class(cls):
     cls.res1 = GLM(cpunish_data.endog,
                    cpunish_data.exog,
                    family=sm.families.Poisson()).fit()
     # compare with discrete, start close to save time
     modd = discrete.Poisson(cpunish_data.endog, cpunish_data.exog)
     cls.res2 = res_stata.results_poisson_none_nonrobust
    def setup_class(cls):
        cls.res2 = results_st.results_poisson_exposure_hc1
        mod = smd.Poisson(endog, exog, exposure=exposure)
        cls.res1 = mod.fit(disp=False, cov_type='HC1')

        cls.bse_rob = cls.res1.bse

        cls.corr_fact = cls.get_correction_factor(cls.res1, sub_kparams=False)
    def setup_class(cls):
        fweights = [1, 1, 1, 2, 2, 2, 3, 3, 3, 1, 1, 1, 2, 2, 2, 3, 3]
        fweights = np.array(fweights)

        cls.res1 = GLM(cpunish_data.endog, cpunish_data.exog,
                    family=sm.families.Poisson(), freq_weights=fweights).fit()
        # compare with discrete, start close to save time
        modd = discrete.Poisson(cpunish_data.endog, cpunish_data.exog)
        cls.res2 = res_stata.results_poisson_fweight_nonrobust
    def setup_class(cls):
        cls.res2 = results_st.results_poisson_exposure_hc1
        mod = smd.Poisson(endog, exog, exposure=exposure)
        cls.res1 = mod.fit(disp=False, cov_type='HC1')

        cls.bse_rob = cls.res1.bse
        nobs, k_vars = mod.exog.shape
        corr_fact = (nobs) / float(nobs - 1.)
        # for bse we need sqrt of correction factor
        cls.corr_fact = np.sqrt(1. / corr_fact)
    def setup_class(cls):
        cls.res2 = results_st.results_poisson_hc1
        mod = smd.Poisson(endog, exog)
        cls.res1 = mod.fit(disp=False)

        from statsmodels.base.covtype import get_robustcov_results

        #res_hc0_ = cls.res1.get_robustcov_results('HC1')
        get_robustcov_results(cls.res1._results, 'HC1', use_self=True)
        cls.bse_rob = cls.res1.bse

        cls.corr_fact = cls.get_correction_factor(cls.res1, sub_kparams=False)
    def setup_class(cls):
        np.random.seed(987125643)  # not intentional seed
        endog_count = np.random.poisson(endog)
        cls.cov_type = 'HC0'

        mod1 = GLM(endog_count, exog, family=families.Poisson())
        cls.res1 = mod1.fit(cov_type='HC0')

        mod1 = smd.Poisson(endog_count, exog)
        cls.res2 = mod1.fit(cov_type='HC0')

        cls.res1.rtol = 1e-11
    def setup_class(cls):
        cls.res2 = results_st.results_poisson_hc1
        mod = smd.Poisson(endog, exog)
        cls.res1 = mod.fit(disp=False)

        from statsmodels.base.covtype import get_robustcov_results

        #res_hc0_ = cls.res1.get_robustcov_results('HC1')
        get_robustcov_results(cls.res1._results, 'HC1', use_self=True)
        cls.bse_rob = cls.res1.bse
        nobs, k_vars = mod.exog.shape
        corr_fact = (nobs) / float(nobs - 1.)
        # for bse we need sqrt of correction factor
        cls.corr_fact = np.sqrt(1. / corr_fact)
    def setupClass(cls):
        self = cls # alias

        fweights = [1, 1, 1, 2, 2, 2, 3, 3, 3, 1, 1, 1, 2, 2, 2, 3, 3]
        # faking aweights by using normalized freq_weights
        fweights = np.array(fweights)
        wsum = fweights.sum()
        nobs = len(cpunish_data.endog)
        aweights = fweights / wsum * nobs

        self.res1 = GLM(cpunish_data.endog, cpunish_data.exog,
                    family=sm.families.Poisson(), freq_weights=aweights).fit()
        # compare with discrete, start close to save time
        modd = discrete.Poisson(cpunish_data.endog, cpunish_data.exog)
        self.res2 = res_stata.results_poisson_aweight_nonrobust
Beispiel #12
0
    def __init__(self):
        '''
        Tests Poisson family with canonical log link.

        Test results were obtained by R.
        '''
        from .results.results_glm import Cpunish
        from statsmodels.datasets.cpunish import load
        self.data = load()
        self.data.exog[:,3] = np.log(self.data.exog[:,3])
        self.data.exog = add_constant(self.data.exog, prepend=False)
        self.res1 = GLM(self.data.endog, self.data.exog,
                    family=sm.families.Poisson()).fit()
        self.res2 = Cpunish()
        # compare with discrete, start close to save time
        modd = discrete.Poisson(self.data.endog, self.data.exog)
        self.resd = modd.fit(start_params=self.res1.params * 0.9, disp=False)
    def setup_class(cls):
        cls.res2 = results_st.results_poisson_exposure_clu #nonrobust
        mod = smd.Poisson(endog, exog, exposure=exposure)
        cls.res1 = res1 = mod.fit(disp=False)

        from statsmodels.base.covtype import get_robustcov_results

        #res_hc0_ = cls.res1.get_robustcov_results('HC1')
        get_robustcov_results(cls.res1._results, 'cluster',
                                                  groups=group,
                                                  use_correction=True,
                                                  df_correction=True,  #TODO has no effect
                                                  use_t=False, #True,
                                                  use_self=True)
        cls.bse_rob = cls.res1.bse #sw.se_cov(cov_clu)

        cls.corr_fact = cls.get_correction_factor(cls.res1)
Beispiel #14
0
def fit_data(data, explained, explanatory, poisson=False):
    """Fit a response variable (e.g. intensity) to a list of explanatory variables

    The fitting is run twice, restricting to the significant explanatory
    variables in the second run.

    Parameters
    ----------
    data : DataFrame { year, `explained`, `explanatory`, ... }
        An intercept column is added automatically.
    explained : str
        Name of explained variable, e.g. 'intensity'.
    explanatory : list of str
        Names of explanatory variables, e.g. ['gmt','esoi'].
    poisson : boolean
        Optionally, use Poisson regression for fitting.
        If False (default), uses ordinary least squares (OLS) regression.

    Returns
    -------
    sm_results : pair of statsmodels Results object
        Results for first and second run.
    """
    d_explained = data[explained]
    d_explanatory = data[explanatory]

    # for the first run, assume that all variables are significant
    significant = explanatory
    sm_results = []
    for _ in range(2):
        # restrict to variables with significant relationship
        d_explanatory = d_explanatory[significant]

        # add column for intercept
        d_explanatory['const'] = 1.0

        if poisson:
            mod = smd.Poisson(d_explained, d_explanatory)
            res = mod.fit(maxiter=100, disp=0, cov_type='HC1')
        else:
            mod = sm.OLS(d_explained, d_explanatory)
            res = mod.fit(maxiter=100, disp=0, cov_type='HC1', use_t=True)
        significant = fit_significant(res)
        sm_results.append(res)

    return sm_results
    def setup_class(cls):
        fweights = [1, 1, 1, 2, 2, 2, 3, 3, 3, 1, 1, 1, 2, 2, 2, 3, 3]
        # faking aweights by using normalized freq_weights
        fweights = np.array(fweights)
        wsum = fweights.sum()
        nobs = len(cpunish_data.endog)
        aweights = fweights / wsum * nobs

        cls.res1 = GLM(cpunish_data.endog, cpunish_data.exog,
                    family=sm.families.Poisson(), var_weights=aweights).fit()
        # compare with discrete, start close to save time
        modd = discrete.Poisson(cpunish_data.endog, cpunish_data.exog)

        # Need to copy to avoid inplace adjustment
        from copy import copy
        cls.res2 = copy(res_stata.results_poisson_aweight_nonrobust)
        cls.res2.resids = cls.res2.resids.copy()

        # Need to adjust resids for pearson and deviance to add weights
        cls.res2.resids[:, 3:5] *= np.sqrt(aweights[:, np.newaxis])
Beispiel #16
0
    def setup_class(cls):

        cls.res2 = results_st.results_poisson_clu
        mod = smd.Poisson(endog, exog)

        # scaling of cov_params_default to match Stata
        # TODO should the default be changed?
        nobs, k_params = mod.exog.shape
        sc_fact = (nobs - 1.) / float(nobs - k_params)

        cls.res1 = mod.fit(
            disp=False,
            cov_type='cluster',
            cov_kwds=dict(groups=group,
                          use_correction=True,
                          scaling_factor=1. / sc_fact,
                          df_correction=True),  #TODO has no effect
            use_t=False,  #True,
        )

        cls.bse_rob = cls.res1.bse

        # backwards compatibility with inherited test methods
        cls.corr_fact = 1
Beispiel #17
0
for t in Teams:
    homeResults = season.loc[(season['HomeTeam'] == t)]
    awayResults = season.loc[(season['AwayTeam'] == t)]
    homeAttack = homeResults['FTHG']
    homeDefence = homeResults['FTAG']
    alpha = list(homeAttack)
    beta = list(homeDefence)
    awayAttack = awayResults['FTAG']
    awayDefence = awayResults['FTHG']
    gamma = list(awayAttack)
    delta = list(awayDefence)
    if len(alpha) != len(beta):
        raise Exception('Check Alpha and Delta lengths for ', t)
    if len(gamma) != len(delta):
        raise Exception('Check Gamma and Delta lengths for ', t)    
    resAlpha = sm.Poisson(alpha,np.ones_like(alpha)).fit(disp=0)
    resBeta = sm.Poisson(beta,np.ones_like(beta)).fit(disp=0)
    resGamma = sm.Poisson(gamma,np.ones_like(gamma)).fit(disp=0)
    resDelta = sm.Poisson(delta,np.ones_like(delta)).fit(disp=0)
    
    allresults = [t, np.exp(resAlpha.params[0]), np.exp(resBeta.params[0]), np.exp(resGamma.params[0]), np.exp(resDelta.params[0])]
    factors.loc[i] = allresults
    i = i+1

factors


# In[3]:

homeResults = season.loc[(season['HomeTeam'] == 'QPR')]
awayResults = season.loc[(season['AwayTeam'] == 'QPR')]
 def setup_class(cls):
     cls.res2 = results_st.results_poisson_clu
     mod = smd.Poisson(endog, exog)
     cls.res1 = mod.fit(disp=False)
     cls.get_robust_clu()
 def setup_class(cls):
     cls.res2 = results_st.results_poisson_clu
     mod = smd.Poisson(endog, exog)
     mod = GLM(endog, exog, family=families.Poisson())
     cls.res1 = mod.fit()
     cls.get_robust_clu()
 def setup_class(cls):
     cls.res2 = results_st.results_poisson_exposure_clu  #nonrobust
     mod = smd.Poisson(endog, exog, exposure=exposure)
     cls.res1 = mod.fit(disp=False)
     cls.get_robust_clu()
Beispiel #21
0
# axs[0,1].plot(fpr2, tpr2, marker='.', label='Logistic Regression')
# axs[0,1].set_title('Y ~ wh_ratio')
# axs[1,0].plot(ns_fpr1, ns_tpr1, linestyle='--', label='No Skill')
# axs[1,0].plot(fpr3, tpr3, marker='.', label='Logistic Regression')
# axs[1,0].set_title('Y ~ age + gender + BMI + wh_ratio')
# axs[1,1].plot(ns_fpr1, ns_tpr1, linestyle='--', label='No Skill')
# axs[1,1].plot(fpr4, tpr4, marker='.', label='Logistic Regression')
# axs[1,1].set_title('Y ~ age + gender + BMI + wh_ratio + genderxBMI + genderxwh_ratio')
# plt.show()

# Problem #2
data2 = pd.read_csv('https://donatello-telesca.squarespace.com/s/medpar.csv')
data2 = smtools.add_constant(data2)
data2['type2xage80'] = data2.type2 * data2.age80
data2['type3xage80'] = data2.type3 * data2.age80
pmod = sm_mod.Poisson(data2.los, data2[['const', 'type2', 'type3']])
pmod_results = pmod.fit()
print(pmod_results.summary())

pmod2 = sm_mod.Poisson(data2.los, data2[['const', 'type2', 'type3', 'age80']])
pmod2_results = pmod2.fit()
print(pmod2_results.summary())

pmod3 = sm_mod.Poisson(
    data2.los,
    data2[['const', 'type2', 'type3', 'age80', 'type2xage80', 'type3xage80']])
pmod3_results = pmod3.fit()
print(pmod3_results.summary())

# Generate diagnostic plots for poisson
# Fitted vs. pearson residual