def junk():
    # Singular Matrix in mod1a.fit()

    formula1 = 'deaths ~ smokes + C(agecat)'

    formula2 = 'deaths ~ C(agecat) + C(smokes) : C(agecat)'  # same as Stata default

    mod = Poisson.from_formula(formula2,
                               data=data,
                               exposure=data['pyears'].values)

    res0 = mod.fit()

    constraints = 'C(smokes)[T.1]:C(agecat)[3] = C(smokes)[T.1]:C(agecat)[4]'

    import patsy
    lc = patsy.DesignInfo(mod.exog_names).linear_constraint(constraints)
    R, q = lc.coefs, lc.constants

    resc = mod.fit_constrained(R, q, fit_kwds={'method': 'bfgs'})

    # example without offset
    formula1a = 'deaths ~ logpyears + smokes + C(agecat)'
    mod1a = Poisson.from_formula(formula1a, data=data)
    print(mod1a.exog.shape)

    res1a = mod1a.fit()
    lc_1a = patsy.DesignInfo(
        mod1a.exog_names).linear_constraint('C(agecat)[T.4] = C(agecat)[T.5]')
    resc1a = mod1a.fit_constrained(lc_1a.coefs,
                                   lc_1a.constants,
                                   fit_kwds={'method': 'newton'})
    print(resc1a[0])
    print(resc1a[1])
예제 #2
0
def junk():  # FIXME: make this into a test, or move/remove
    # Singular Matrix in mod1a.fit()

    # same as Stata default
    formula2 = 'deaths ~ C(agecat) + C(smokes) : C(agecat)'

    mod = Poisson.from_formula(formula2, data=data,
                               exposure=data['pyears'].values)

    mod.fit()

    constraints = 'C(smokes)[T.1]:C(agecat)[3] = C(smokes)[T.1]:C(agec`at)[4]'

    import patsy
    lc = patsy.DesignInfo(mod.exog_names).linear_constraint(constraints)
    R, q = lc.coefs, lc.constants

    mod.fit_constrained(R, q, fit_kwds={'method': 'bfgs'})

    # example without offset
    formula1a = 'deaths ~ logpyears + smokes + C(agecat)'
    mod1a = Poisson.from_formula(formula1a, data=data)

    mod1a.fit()
    lc_1a = patsy.DesignInfo(mod1a.exog_names).linear_constraint(
        'C(agecat)[T.4] = C(agecat)[T.5]')
    mod1a.fit_constrained(lc_1a.coefs, lc_1a.constants,
                          fit_kwds={'method': 'newton'})
예제 #3
0
def junk():
    # Singular Matrix in mod1a.fit()

    formula1 = 'deaths ~ smokes + C(agecat)'

    formula2 = 'deaths ~ C(agecat) + C(smokes) : C(agecat)'  # same as Stata default

    mod = Poisson.from_formula(formula2, data=data, exposure=data['pyears'].values)

    res0 = mod.fit()

    constraints = 'C(smokes)[T.1]:C(agecat)[3] = C(smokes)[T.1]:C(agecat)[4]'

    import patsy
    lc = patsy.DesignInfo(mod.exog_names).linear_constraint(constraints)
    R, q = lc.coefs, lc.constants

    resc = mod.fit_constrained(R,q, fit_kwds={'method':'bfgs'})

    # example without offset
    formula1a = 'deaths ~ logpyears + smokes + C(agecat)'
    mod1a = Poisson.from_formula(formula1a, data=data)
    print(mod1a.exog.shape)

    res1a = mod1a.fit()
    lc_1a = patsy.DesignInfo(mod1a.exog_names).linear_constraint('C(agecat)[T.4] = C(agecat)[T.5]')
    resc1a = mod1a.fit_constrained(lc_1a.coefs, lc_1a.constants, fit_kwds={'method':'newton'})
    print(resc1a[0])
    print(resc1a[1])
예제 #4
0
    def setup_class(cls):

        cls.res2 = results.results_noexposure_constraint
        cls.idx = [7, 3, 4, 5, 6, 0, 1]  # 2 is dropped baseline for categorical

        # example without offset
        formula = 'deaths ~ logpyears + smokes + C(agecat)'
        mod = Poisson.from_formula(formula, data=data)
        #res1a = mod1a.fit()
        # get start_params, example fails to converge on one py TravisCI
        k_vars = len(mod.exog_names)
        start_params = np.zeros(k_vars)
        start_params[0] = np.log(mod.endog.mean())
        # if we need it, this is desired params
        p = np.array([-3.93478643,  1.37276214,  2.33077032,  2.71338891,
                      2.71338891, 0.57966535,  0.97254074])

        constr = 'C(agecat)[T.4] = C(agecat)[T.5]'
        lc = patsy.DesignInfo(mod.exog_names).linear_constraint(constr)
        cls.res1 = fit_constrained(mod, lc.coefs, lc.constants,
                                        start_params=start_params,
                                        fit_kwds={'method': 'bfgs',
                                                  'disp': 0})
        # TODO: Newton fails

        # test method of Poisson, not monkey patched
        cls.res1m = mod.fit_constrained(constr, start_params=start_params,
                                        method='bfgs', disp=0)
    def setup_class(cls):

        cls.res2 = results.results_exposure_constraint2
        #cls.idx = [3, 4, 5, 6, 0, 1]  # 2 is dropped baseline for categorical
        cls.idx = [6, 2, 3, 4, 5, 0]  # 2 is dropped baseline for categorical

        # example without offset
        formula = 'deaths ~ smokes + C(agecat)'
        mod = Poisson.from_formula(formula,
                                   data=data,
                                   offset=np.log(data['pyears'].values))

        constr = 'C(agecat)[T.5] - C(agecat)[T.4] = 0.5'
        lc = patsy.DesignInfo(mod.exog_names).linear_constraint(constr)
        cls.res1 = fit_constrained(mod,
                                   lc.coefs,
                                   lc.constants,
                                   fit_kwds={
                                       'method': 'newton',
                                       'disp': 0
                                   })
        cls.constraints = lc
        # TODO: bfgs fails

        # test method of Poisson, not monkey patched
        cls.res1m = mod.fit_constrained(constr,
                                        method='bfgs',
                                        disp=0,
                                        start_params=cls.res1[0])
    def test_compare_glm_poisson(self):
        res1 = self.res1m
        res2 = self.res2

        formula = 'deaths ~ smokes + C(agecat)'
        mod = Poisson.from_formula(
            formula,
            data=data,
            #offset=np.log(data['pyears'].values))
            exposure=data['pyears'].values)

        constr = 'C(agecat)[T.4] = C(agecat)[T.5]'
        res2 = mod.fit_constrained(constr,
                                   start_params=self.res1m.params,
                                   method='newton',
                                   warn_convergence=False,
                                   disp=0)

        # we get high precision because we use the params as start_params

        # basic, just as check that we have the same model
        assert_allclose(res1.params, res2.params, rtol=1e-12)
        assert_allclose(res1.bse, res2.bse, rtol=1e-11)

        # check predict, fitted, ...

        predicted = res1.predict()
        assert_allclose(predicted, res2.predict(), rtol=1e-10)
        assert_allclose(res1.mu, predicted, rtol=1e-10)
        assert_allclose(res1.fittedvalues, predicted, rtol=1e-10)
        assert_allclose(res2.predict(linear=True),
                        res2.predict(linear=True),
                        rtol=1e-10)
예제 #7
0
    def test_compare_glm_poisson(self):
        res1 = self.res1m
        res2 = self.res2

        formula = 'deaths ~ smokes + C(agecat)'
        mod = Poisson.from_formula(formula, data=data,
                                   exposure=data['pyears'].values)
                                   #offset=np.log(data['pyears'].values))

        constr = 'C(agecat)[T.4] = C(agecat)[T.5]'
        res2 = mod.fit_constrained(constr, start_params=self.res1m.params,
                                   method='newton', warn_convergence=False,
                                   disp=0)

        # we get high precision because we use the params as start_params

        # basic, just as check that we have the same model
        assert_allclose(res1.params, res2.params, rtol=1e-12)
        assert_allclose(res1.bse, res2.bse, rtol=1e-12)

        # check predict, fitted, ...

        predicted = res1.predict()
        assert_allclose(predicted, res2.predict(), rtol=1e-10)
        assert_allclose(res1.mu, predicted, rtol=1e-10)
        assert_allclose(res1.fittedvalues, predicted, rtol=1e-10)
        assert_allclose(res2.predict(linear=True), res2.predict(linear=True),
                        rtol=1e-10)
예제 #8
0
    def setup_class(cls):

        cls.res2 = results.results_noexposure_constraint2
        cls.idx = [7, 3, 4, 5, 6, 0, 1]  # 2 is dropped baseline for categorical

        # example without offset
        formula = 'deaths ~ logpyears + smokes + C(agecat)'
        mod = Poisson.from_formula(formula, data=data)

        # get start_params, example fails to converge on one py TravisCI
        k_vars = len(mod.exog_names)
        start_params = np.zeros(k_vars)
        start_params[0] = np.log(mod.endog.mean())
        # if we need it, this is desired params
        p = np.array([-9.43762015,  1.52762442,  2.74155711,  3.58730007,
                      4.08730007,  1.15987869,  0.12111539])

        constr = 'C(agecat)[T.5] - C(agecat)[T.4] = 0.5'
        lc = patsy.DesignInfo(mod.exog_names).linear_constraint(constr)
        cls.res1 = fit_constrained(mod, lc.coefs, lc.constants,
                                   start_params=start_params,
                                   fit_kwds={'method': 'bfgs', 'disp': 0})
        # TODO: Newton fails

        # test method of Poisson, not monkey patched
        cls.res1m = mod.fit_constrained(constr, start_params=start_params,
                                        method='bfgs', disp=0)
예제 #9
0
    def setup_class(cls):

        cls.res2 = results.results_noexposure_constraint
        cls.idx = [7, 3, 4, 5, 6, 0, 1]  # 2 is dropped baseline for categorical

        # example without offset
        formula = 'deaths ~ logpyears + smokes + C(agecat)'
        mod = Poisson.from_formula(formula, data=data)
        #res1a = mod1a.fit()
        # get start_params, example fails to converge on one py TravisCI
        k_vars = len(mod.exog_names)
        start_params = np.zeros(k_vars)
        start_params[0] = np.log(mod.endog.mean())
        # if we need it, this is desired params
        p = np.array([-3.93478643,  1.37276214,  2.33077032,  2.71338891,
                      2.71338891, 0.57966535,  0.97254074])

        constr = 'C(agecat)[T.4] = C(agecat)[T.5]'
        lc = patsy.DesignInfo(mod.exog_names).linear_constraint(constr)
        cls.res1 = fit_constrained(mod, lc.coefs, lc.constants,
                                        start_params=start_params,
                                        fit_kwds={'method': 'bfgs',
                                                  'disp': 0})
        # TODO: Newton fails

        # test method of Poisson, not monkey patched
        cls.res1m = mod.fit_constrained(constr, start_params=start_params,
                                        method='bfgs', disp=0)
예제 #10
0
    def setup_class(cls):

        cls.res2 = results.results_noexposure_constraint2
        cls.idx = [7, 3, 4, 5, 6, 0, 1]  # 2 is dropped baseline for categorical

        # example without offset
        formula = 'deaths ~ logpyears + smokes + C(agecat)'
        mod = Poisson.from_formula(formula, data=data)

        # get start_params, example fails to converge on one py TravisCI
        k_vars = len(mod.exog_names)
        start_params = np.zeros(k_vars)
        start_params[0] = np.log(mod.endog.mean())
        # if we need it, this is desired params
        p = np.array([-9.43762015,  1.52762442,  2.74155711,  3.58730007,
                      4.08730007,  1.15987869,  0.12111539])

        constr = 'C(agecat)[T.5] - C(agecat)[T.4] = 0.5'
        lc = patsy.DesignInfo(mod.exog_names).linear_constraint(constr)
        cls.res1 = fit_constrained(mod, lc.coefs, lc.constants,
                                   start_params=start_params,
                                   fit_kwds={'method': 'bfgs', 'disp': 0})
        # TODO: Newton fails

        # test method of Poisson, not monkey patched
        cls.res1m = mod.fit_constrained(constr, start_params=start_params,
                                        method='bfgs', disp=0)
예제 #11
0
    def setup_class(cls):

        cls.res2 = results.results_exposure_noconstraint
        cls.idx = [6, 2, 3, 4, 5, 0]  # 1 is dropped baseline for categorical

        # example without offset
        formula = 'deaths ~ smokes + C(agecat)'
        mod = Poisson.from_formula(formula, data=data,
                                   offset=np.log(data['pyears'].values))
        res1 = mod.fit(disp=0)._results
        # res1 is duplicate check, so we can follow the same pattern
        cls.res1 = (res1.params, res1.cov_params())
        cls.res1m = res1
    def setup_class(cls):
        from statsmodels.discrete.discrete_model import Poisson
        import statsmodels.stats.tests.test_anova as ttmod

        test = ttmod.TestAnova3()
        test.setup_class()
        cls.data = test.data.drop([0,1,2])

        mod = Poisson.from_formula("Days ~ C(Duration) + C(Weight)", cls.data)
        cls.res = mod.fit(cov_type='HC0')
        cls.term_name = "C(Weight)"
        cls.constraints = ['C(Weight)[T.2]',
                           'C(Weight)[T.3]',
                           'C(Weight)[T.3] - C(Weight)[T.2]']
예제 #13
0
    def setup_class(cls):
        from statsmodels.discrete.discrete_model import Poisson
        import statsmodels.stats.tests.test_anova as ttmod

        test = ttmod.TestAnova3()
        test.setup_class()
        cls.data = test.data.drop([0,1,2])

        mod = Poisson.from_formula("Days ~ C(Duration) + C(Weight)", cls.data)
        cls.res = mod.fit(cov_type='HC0')
        cls.term_name = "C(Weight)"
        cls.constraints = ['C(Weight)[T.2]',
                           'C(Weight)[T.3]',
                           'C(Weight)[T.3] - C(Weight)[T.2]']
예제 #14
0
    def setup_class(cls):

        cls.res2 = results.results_exposure_noconstraint
        cls.idx = [6, 2, 3, 4, 5, 0] # 1 is dropped baseline for categorical

        # example without offset
        formula = 'deaths ~ smokes + C(agecat)'
        mod = Poisson.from_formula(formula, data=data,
                                   #exposure=data['pyears'].values)
                                   offset=np.log(data['pyears'].values))
        res1 = mod.fit(disp=0)._results
        # res1 is duplicate check, so we can follow the same pattern
        cls.res1 = (res1.params, res1.cov_params())
        cls.res1m = res1
예제 #15
0
    def setup_class(cls):

        cls.res2 = results.results_exposure_constraint
        #cls.idx = [3, 4, 5, 6, 0, 1]  # 2 is dropped baseline for categorical
        cls.idx = [6, 2, 3, 4, 5, 0]  # 2 is dropped baseline for categorical

        # example without offset
        formula = 'deaths ~ smokes + C(agecat)'
        mod = Poisson.from_formula(formula, data=data,
                                   offset=np.log(data['pyears'].values))
        #res1a = mod1a.fit()
        constr = 'C(agecat)[T.4] = C(agecat)[T.5]'
        lc = patsy.DesignInfo(mod.exog_names).linear_constraint(constr)
        cls.res1 = fit_constrained(mod, lc.coefs, lc.constants,
                                   fit_kwds={'method':'newton'})
        cls.constraints = lc
        # TODO: bfgs fails

        # test method of Poisson, not monkey patched
        cls.res1m = mod.fit_constrained(constr, method='newton')
예제 #16
0
    def setup_class(cls):

        cls.res2 = results.results_exposure_constraint2
        #cls.idx = [3, 4, 5, 6, 0, 1]  # 2 is dropped baseline for categorical
        cls.idx = [6, 2, 3, 4, 5, 0]  # 2 is dropped baseline for categorical

        # example without offset
        formula = 'deaths ~ smokes + C(agecat)'
        mod = Poisson.from_formula(formula, data=data,
                                   #offset=np.log(data['pyears'].values))
                                   exposure=data['pyears'].values)
        #res1a = mod1a.fit()
        constr = 'C(agecat)[T.5] - C(agecat)[T.4] = 0.5'
        lc = patsy.DesignInfo(mod.exog_names).linear_constraint(constr)
        cls.res1 = fit_constrained(mod, lc.coefs, lc.constants,
                                   fit_kwds={'method': 'newton',
                                             'disp': 0})
        cls.constraints = lc
        # TODO: bfgs fails to converge. overflow somewhere?

        # test method of Poisson, not monkey patched
        cls.res1m = mod.fit_constrained(constr, method='bfgs', disp=0,
                                        start_params=cls.res1[0])
예제 #17
0
    def initialize(cls):
        from statsmodels.discrete.discrete_model import Poisson

        mod = Poisson.from_formula("Days ~ C(Duration, Sum)*C(Weight, Sum)",
                                   cls.data)
        cls.res = mod.fit(cov_type='HC0')
예제 #18
0
from statsmodels.formula.api import ols, glm, poisson
from statsmodels.discrete.discrete_model import Poisson

import statsmodels.stats.tests.test_anova as ttmod

test = ttmod.TestAnova3()
test.setup_class()

data = test.data.drop([0, 1, 2])
res_ols = ols("np.log(Days+1) ~ C(Duration, Sum)*C(Weight, Sum)",
              data).fit(use_t=False)

res_glm = glm("np.log(Days+1) ~ C(Duration, Sum)*C(Weight, Sum)", data).fit()

res_poi = Poisson.from_formula("Days ~ C(Weight) * C(Duration)",
                               data).fit(cov_type='HC0')
res_poi_2 = poisson("Days ~ C(Weight) + C(Duration)", data).fit(cov_type='HC0')

print('\nOLS')
print(res_ols.wald_test_terms())
print('\nGLM')
print(
    res_glm.wald_test_terms(skip_single=False,
                            combine_terms=['Duration', 'Weight']))
print('\nPoisson 1')
print(
    res_poi.wald_test_terms(skip_single=False,
                            combine_terms=['Duration', 'Weight']))
print('\nPoisson 2')
print(res_poi_2.wald_test_terms(skip_single=False))
예제 #19
0
from statsmodels.formula.api import ols, glm, poisson
from statsmodels.discrete.discrete_model import Poisson

import statsmodels.stats.tests.test_anova as ttmod

test = ttmod.TestAnova3()
test.setupClass()

data = test.data.drop([0,1,2])
res_ols = ols("np.log(Days+1) ~ C(Duration, Sum)*C(Weight, Sum)", data).fit(use_t=False)

res_glm = glm("np.log(Days+1) ~ C(Duration, Sum)*C(Weight, Sum)",
                        data).fit()

res_poi = Poisson.from_formula("Days ~ C(Weight) * C(Duration)", data).fit(cov_type='HC0')
res_poi_2 = poisson("Days ~ C(Weight) + C(Duration)", data).fit(cov_type='HC0')

print('\nOLS')
print(res_ols.wald_test_terms())
print('\nGLM')
print(res_glm.wald_test_terms(skip_single=False, combine_terms=['Duration', 'Weight']))
print('\nPoisson 1')
print(res_poi.wald_test_terms(skip_single=False, combine_terms=['Duration', 'Weight']))
print('\nPoisson 2')
print(res_poi_2.wald_test_terms(skip_single=False))

from statsmodels.discrete.discrete_model import NegativeBinomial
res_nb2 = NegativeBinomial.from_formula("Days ~ C(Weight) * C(Duration)", data).fit()
print('\nNegative Binomial nb2')
print(res_nb2.wald_test_terms(skip_single=False))
    def initialize(cls):
        from statsmodels.discrete.discrete_model import Poisson

        mod = Poisson.from_formula("Days ~ C(Duration, Sum)*C(Weight, Sum)", cls.data)
        cls.res = mod.fit(cov_type='HC0')