def junk(): # Singular Matrix in mod1a.fit() formula1 = 'deaths ~ smokes + C(agecat)' formula2 = 'deaths ~ C(agecat) + C(smokes) : C(agecat)' # same as Stata default mod = Poisson.from_formula(formula2, data=data, exposure=data['pyears'].values) res0 = mod.fit() constraints = 'C(smokes)[T.1]:C(agecat)[3] = C(smokes)[T.1]:C(agecat)[4]' import patsy lc = patsy.DesignInfo(mod.exog_names).linear_constraint(constraints) R, q = lc.coefs, lc.constants resc = mod.fit_constrained(R, q, fit_kwds={'method': 'bfgs'}) # example without offset formula1a = 'deaths ~ logpyears + smokes + C(agecat)' mod1a = Poisson.from_formula(formula1a, data=data) print(mod1a.exog.shape) res1a = mod1a.fit() lc_1a = patsy.DesignInfo( mod1a.exog_names).linear_constraint('C(agecat)[T.4] = C(agecat)[T.5]') resc1a = mod1a.fit_constrained(lc_1a.coefs, lc_1a.constants, fit_kwds={'method': 'newton'}) print(resc1a[0]) print(resc1a[1])
def junk(): # FIXME: make this into a test, or move/remove # Singular Matrix in mod1a.fit() # same as Stata default formula2 = 'deaths ~ C(agecat) + C(smokes) : C(agecat)' mod = Poisson.from_formula(formula2, data=data, exposure=data['pyears'].values) mod.fit() constraints = 'C(smokes)[T.1]:C(agecat)[3] = C(smokes)[T.1]:C(agec`at)[4]' import patsy lc = patsy.DesignInfo(mod.exog_names).linear_constraint(constraints) R, q = lc.coefs, lc.constants mod.fit_constrained(R, q, fit_kwds={'method': 'bfgs'}) # example without offset formula1a = 'deaths ~ logpyears + smokes + C(agecat)' mod1a = Poisson.from_formula(formula1a, data=data) mod1a.fit() lc_1a = patsy.DesignInfo(mod1a.exog_names).linear_constraint( 'C(agecat)[T.4] = C(agecat)[T.5]') mod1a.fit_constrained(lc_1a.coefs, lc_1a.constants, fit_kwds={'method': 'newton'})
def junk(): # Singular Matrix in mod1a.fit() formula1 = 'deaths ~ smokes + C(agecat)' formula2 = 'deaths ~ C(agecat) + C(smokes) : C(agecat)' # same as Stata default mod = Poisson.from_formula(formula2, data=data, exposure=data['pyears'].values) res0 = mod.fit() constraints = 'C(smokes)[T.1]:C(agecat)[3] = C(smokes)[T.1]:C(agecat)[4]' import patsy lc = patsy.DesignInfo(mod.exog_names).linear_constraint(constraints) R, q = lc.coefs, lc.constants resc = mod.fit_constrained(R,q, fit_kwds={'method':'bfgs'}) # example without offset formula1a = 'deaths ~ logpyears + smokes + C(agecat)' mod1a = Poisson.from_formula(formula1a, data=data) print(mod1a.exog.shape) res1a = mod1a.fit() lc_1a = patsy.DesignInfo(mod1a.exog_names).linear_constraint('C(agecat)[T.4] = C(agecat)[T.5]') resc1a = mod1a.fit_constrained(lc_1a.coefs, lc_1a.constants, fit_kwds={'method':'newton'}) print(resc1a[0]) print(resc1a[1])
def setup_class(cls): cls.res2 = results.results_noexposure_constraint cls.idx = [7, 3, 4, 5, 6, 0, 1] # 2 is dropped baseline for categorical # example without offset formula = 'deaths ~ logpyears + smokes + C(agecat)' mod = Poisson.from_formula(formula, data=data) #res1a = mod1a.fit() # get start_params, example fails to converge on one py TravisCI k_vars = len(mod.exog_names) start_params = np.zeros(k_vars) start_params[0] = np.log(mod.endog.mean()) # if we need it, this is desired params p = np.array([-3.93478643, 1.37276214, 2.33077032, 2.71338891, 2.71338891, 0.57966535, 0.97254074]) constr = 'C(agecat)[T.4] = C(agecat)[T.5]' lc = patsy.DesignInfo(mod.exog_names).linear_constraint(constr) cls.res1 = fit_constrained(mod, lc.coefs, lc.constants, start_params=start_params, fit_kwds={'method': 'bfgs', 'disp': 0}) # TODO: Newton fails # test method of Poisson, not monkey patched cls.res1m = mod.fit_constrained(constr, start_params=start_params, method='bfgs', disp=0)
def setup_class(cls): cls.res2 = results.results_exposure_constraint2 #cls.idx = [3, 4, 5, 6, 0, 1] # 2 is dropped baseline for categorical cls.idx = [6, 2, 3, 4, 5, 0] # 2 is dropped baseline for categorical # example without offset formula = 'deaths ~ smokes + C(agecat)' mod = Poisson.from_formula(formula, data=data, offset=np.log(data['pyears'].values)) constr = 'C(agecat)[T.5] - C(agecat)[T.4] = 0.5' lc = patsy.DesignInfo(mod.exog_names).linear_constraint(constr) cls.res1 = fit_constrained(mod, lc.coefs, lc.constants, fit_kwds={ 'method': 'newton', 'disp': 0 }) cls.constraints = lc # TODO: bfgs fails # test method of Poisson, not monkey patched cls.res1m = mod.fit_constrained(constr, method='bfgs', disp=0, start_params=cls.res1[0])
def test_compare_glm_poisson(self): res1 = self.res1m res2 = self.res2 formula = 'deaths ~ smokes + C(agecat)' mod = Poisson.from_formula( formula, data=data, #offset=np.log(data['pyears'].values)) exposure=data['pyears'].values) constr = 'C(agecat)[T.4] = C(agecat)[T.5]' res2 = mod.fit_constrained(constr, start_params=self.res1m.params, method='newton', warn_convergence=False, disp=0) # we get high precision because we use the params as start_params # basic, just as check that we have the same model assert_allclose(res1.params, res2.params, rtol=1e-12) assert_allclose(res1.bse, res2.bse, rtol=1e-11) # check predict, fitted, ... predicted = res1.predict() assert_allclose(predicted, res2.predict(), rtol=1e-10) assert_allclose(res1.mu, predicted, rtol=1e-10) assert_allclose(res1.fittedvalues, predicted, rtol=1e-10) assert_allclose(res2.predict(linear=True), res2.predict(linear=True), rtol=1e-10)
def test_compare_glm_poisson(self): res1 = self.res1m res2 = self.res2 formula = 'deaths ~ smokes + C(agecat)' mod = Poisson.from_formula(formula, data=data, exposure=data['pyears'].values) #offset=np.log(data['pyears'].values)) constr = 'C(agecat)[T.4] = C(agecat)[T.5]' res2 = mod.fit_constrained(constr, start_params=self.res1m.params, method='newton', warn_convergence=False, disp=0) # we get high precision because we use the params as start_params # basic, just as check that we have the same model assert_allclose(res1.params, res2.params, rtol=1e-12) assert_allclose(res1.bse, res2.bse, rtol=1e-12) # check predict, fitted, ... predicted = res1.predict() assert_allclose(predicted, res2.predict(), rtol=1e-10) assert_allclose(res1.mu, predicted, rtol=1e-10) assert_allclose(res1.fittedvalues, predicted, rtol=1e-10) assert_allclose(res2.predict(linear=True), res2.predict(linear=True), rtol=1e-10)
def setup_class(cls): cls.res2 = results.results_noexposure_constraint2 cls.idx = [7, 3, 4, 5, 6, 0, 1] # 2 is dropped baseline for categorical # example without offset formula = 'deaths ~ logpyears + smokes + C(agecat)' mod = Poisson.from_formula(formula, data=data) # get start_params, example fails to converge on one py TravisCI k_vars = len(mod.exog_names) start_params = np.zeros(k_vars) start_params[0] = np.log(mod.endog.mean()) # if we need it, this is desired params p = np.array([-9.43762015, 1.52762442, 2.74155711, 3.58730007, 4.08730007, 1.15987869, 0.12111539]) constr = 'C(agecat)[T.5] - C(agecat)[T.4] = 0.5' lc = patsy.DesignInfo(mod.exog_names).linear_constraint(constr) cls.res1 = fit_constrained(mod, lc.coefs, lc.constants, start_params=start_params, fit_kwds={'method': 'bfgs', 'disp': 0}) # TODO: Newton fails # test method of Poisson, not monkey patched cls.res1m = mod.fit_constrained(constr, start_params=start_params, method='bfgs', disp=0)
def setup_class(cls): cls.res2 = results.results_noexposure_constraint cls.idx = [7, 3, 4, 5, 6, 0, 1] # 2 is dropped baseline for categorical # example without offset formula = 'deaths ~ logpyears + smokes + C(agecat)' mod = Poisson.from_formula(formula, data=data) #res1a = mod1a.fit() # get start_params, example fails to converge on one py TravisCI k_vars = len(mod.exog_names) start_params = np.zeros(k_vars) start_params[0] = np.log(mod.endog.mean()) # if we need it, this is desired params p = np.array([-3.93478643, 1.37276214, 2.33077032, 2.71338891, 2.71338891, 0.57966535, 0.97254074]) constr = 'C(agecat)[T.4] = C(agecat)[T.5]' lc = patsy.DesignInfo(mod.exog_names).linear_constraint(constr) cls.res1 = fit_constrained(mod, lc.coefs, lc.constants, start_params=start_params, fit_kwds={'method': 'bfgs', 'disp': 0}) # TODO: Newton fails # test method of Poisson, not monkey patched cls.res1m = mod.fit_constrained(constr, start_params=start_params, method='bfgs', disp=0)
def setup_class(cls): cls.res2 = results.results_noexposure_constraint2 cls.idx = [7, 3, 4, 5, 6, 0, 1] # 2 is dropped baseline for categorical # example without offset formula = 'deaths ~ logpyears + smokes + C(agecat)' mod = Poisson.from_formula(formula, data=data) # get start_params, example fails to converge on one py TravisCI k_vars = len(mod.exog_names) start_params = np.zeros(k_vars) start_params[0] = np.log(mod.endog.mean()) # if we need it, this is desired params p = np.array([-9.43762015, 1.52762442, 2.74155711, 3.58730007, 4.08730007, 1.15987869, 0.12111539]) constr = 'C(agecat)[T.5] - C(agecat)[T.4] = 0.5' lc = patsy.DesignInfo(mod.exog_names).linear_constraint(constr) cls.res1 = fit_constrained(mod, lc.coefs, lc.constants, start_params=start_params, fit_kwds={'method': 'bfgs', 'disp': 0}) # TODO: Newton fails # test method of Poisson, not monkey patched cls.res1m = mod.fit_constrained(constr, start_params=start_params, method='bfgs', disp=0)
def setup_class(cls): cls.res2 = results.results_exposure_noconstraint cls.idx = [6, 2, 3, 4, 5, 0] # 1 is dropped baseline for categorical # example without offset formula = 'deaths ~ smokes + C(agecat)' mod = Poisson.from_formula(formula, data=data, offset=np.log(data['pyears'].values)) res1 = mod.fit(disp=0)._results # res1 is duplicate check, so we can follow the same pattern cls.res1 = (res1.params, res1.cov_params()) cls.res1m = res1
def setup_class(cls): from statsmodels.discrete.discrete_model import Poisson import statsmodels.stats.tests.test_anova as ttmod test = ttmod.TestAnova3() test.setup_class() cls.data = test.data.drop([0,1,2]) mod = Poisson.from_formula("Days ~ C(Duration) + C(Weight)", cls.data) cls.res = mod.fit(cov_type='HC0') cls.term_name = "C(Weight)" cls.constraints = ['C(Weight)[T.2]', 'C(Weight)[T.3]', 'C(Weight)[T.3] - C(Weight)[T.2]']
def setup_class(cls): from statsmodels.discrete.discrete_model import Poisson import statsmodels.stats.tests.test_anova as ttmod test = ttmod.TestAnova3() test.setup_class() cls.data = test.data.drop([0,1,2]) mod = Poisson.from_formula("Days ~ C(Duration) + C(Weight)", cls.data) cls.res = mod.fit(cov_type='HC0') cls.term_name = "C(Weight)" cls.constraints = ['C(Weight)[T.2]', 'C(Weight)[T.3]', 'C(Weight)[T.3] - C(Weight)[T.2]']
def setup_class(cls): cls.res2 = results.results_exposure_noconstraint cls.idx = [6, 2, 3, 4, 5, 0] # 1 is dropped baseline for categorical # example without offset formula = 'deaths ~ smokes + C(agecat)' mod = Poisson.from_formula(formula, data=data, #exposure=data['pyears'].values) offset=np.log(data['pyears'].values)) res1 = mod.fit(disp=0)._results # res1 is duplicate check, so we can follow the same pattern cls.res1 = (res1.params, res1.cov_params()) cls.res1m = res1
def setup_class(cls): cls.res2 = results.results_exposure_constraint #cls.idx = [3, 4, 5, 6, 0, 1] # 2 is dropped baseline for categorical cls.idx = [6, 2, 3, 4, 5, 0] # 2 is dropped baseline for categorical # example without offset formula = 'deaths ~ smokes + C(agecat)' mod = Poisson.from_formula(formula, data=data, offset=np.log(data['pyears'].values)) #res1a = mod1a.fit() constr = 'C(agecat)[T.4] = C(agecat)[T.5]' lc = patsy.DesignInfo(mod.exog_names).linear_constraint(constr) cls.res1 = fit_constrained(mod, lc.coefs, lc.constants, fit_kwds={'method':'newton'}) cls.constraints = lc # TODO: bfgs fails # test method of Poisson, not monkey patched cls.res1m = mod.fit_constrained(constr, method='newton')
def setup_class(cls): cls.res2 = results.results_exposure_constraint2 #cls.idx = [3, 4, 5, 6, 0, 1] # 2 is dropped baseline for categorical cls.idx = [6, 2, 3, 4, 5, 0] # 2 is dropped baseline for categorical # example without offset formula = 'deaths ~ smokes + C(agecat)' mod = Poisson.from_formula(formula, data=data, #offset=np.log(data['pyears'].values)) exposure=data['pyears'].values) #res1a = mod1a.fit() constr = 'C(agecat)[T.5] - C(agecat)[T.4] = 0.5' lc = patsy.DesignInfo(mod.exog_names).linear_constraint(constr) cls.res1 = fit_constrained(mod, lc.coefs, lc.constants, fit_kwds={'method': 'newton', 'disp': 0}) cls.constraints = lc # TODO: bfgs fails to converge. overflow somewhere? # test method of Poisson, not monkey patched cls.res1m = mod.fit_constrained(constr, method='bfgs', disp=0, start_params=cls.res1[0])
def initialize(cls): from statsmodels.discrete.discrete_model import Poisson mod = Poisson.from_formula("Days ~ C(Duration, Sum)*C(Weight, Sum)", cls.data) cls.res = mod.fit(cov_type='HC0')
from statsmodels.formula.api import ols, glm, poisson from statsmodels.discrete.discrete_model import Poisson import statsmodels.stats.tests.test_anova as ttmod test = ttmod.TestAnova3() test.setup_class() data = test.data.drop([0, 1, 2]) res_ols = ols("np.log(Days+1) ~ C(Duration, Sum)*C(Weight, Sum)", data).fit(use_t=False) res_glm = glm("np.log(Days+1) ~ C(Duration, Sum)*C(Weight, Sum)", data).fit() res_poi = Poisson.from_formula("Days ~ C(Weight) * C(Duration)", data).fit(cov_type='HC0') res_poi_2 = poisson("Days ~ C(Weight) + C(Duration)", data).fit(cov_type='HC0') print('\nOLS') print(res_ols.wald_test_terms()) print('\nGLM') print( res_glm.wald_test_terms(skip_single=False, combine_terms=['Duration', 'Weight'])) print('\nPoisson 1') print( res_poi.wald_test_terms(skip_single=False, combine_terms=['Duration', 'Weight'])) print('\nPoisson 2') print(res_poi_2.wald_test_terms(skip_single=False))
from statsmodels.formula.api import ols, glm, poisson from statsmodels.discrete.discrete_model import Poisson import statsmodels.stats.tests.test_anova as ttmod test = ttmod.TestAnova3() test.setupClass() data = test.data.drop([0,1,2]) res_ols = ols("np.log(Days+1) ~ C(Duration, Sum)*C(Weight, Sum)", data).fit(use_t=False) res_glm = glm("np.log(Days+1) ~ C(Duration, Sum)*C(Weight, Sum)", data).fit() res_poi = Poisson.from_formula("Days ~ C(Weight) * C(Duration)", data).fit(cov_type='HC0') res_poi_2 = poisson("Days ~ C(Weight) + C(Duration)", data).fit(cov_type='HC0') print('\nOLS') print(res_ols.wald_test_terms()) print('\nGLM') print(res_glm.wald_test_terms(skip_single=False, combine_terms=['Duration', 'Weight'])) print('\nPoisson 1') print(res_poi.wald_test_terms(skip_single=False, combine_terms=['Duration', 'Weight'])) print('\nPoisson 2') print(res_poi_2.wald_test_terms(skip_single=False)) from statsmodels.discrete.discrete_model import NegativeBinomial res_nb2 = NegativeBinomial.from_formula("Days ~ C(Weight) * C(Duration)", data).fit() print('\nNegative Binomial nb2') print(res_nb2.wald_test_terms(skip_single=False))
def initialize(cls): from statsmodels.discrete.discrete_model import Poisson mod = Poisson.from_formula("Days ~ C(Duration, Sum)*C(Weight, Sum)", cls.data) cls.res = mod.fit(cov_type='HC0')