Beispiel #1
0
    def setup_class(cls):
        # generate artificial data
        np.random.seed(98765678)
        nobs = 200
        rvs = np.random.randn(nobs,6)
        data_exog = rvs
        data_exog = sm.add_constant(data_exog, prepend=False)
        xbeta = 1 + 0.1*rvs.sum(1)
        data_endog = np.random.poisson(np.exp(xbeta))

        mod_glm = sm.GLM(data_endog, data_exog, family=sm.families.Poisson())
        cls.res_glm = mod_glm.fit()

        #estimate generic MLE
        #cls.mod = PoissonGMLE(data_endog, data_exog)
        #res = cls.mod.fit()

        #create offset variable based on first exog
        cls.res_discrete = Poisson(data_endog, data_exog).fit(disp=0)
        offset = cls.res_discrete.params[0] * data_exog[:,0]  #1d ???

        #estimate discretemod.Poisson as benchmark, now has offset
        cls.res_discrete = Poisson(data_endog, data_exog[:,1:],
                                    offset=offset).fit(disp=0)

        mod_glm = sm.GLM(data_endog, data_exog, family=sm.families.Poisson())
        cls.res_glm = mod_glm.fit()

        #cls.res = PoissonOffsetGMLE(data_endog, data_exog[:,1:], offset=offset).fit(start_params = np.ones(6)/2., method='nm')
        modo = PoissonOffsetGMLE(data_endog, data_exog[:,1:], offset=offset)
        cls.res = modo.fit(start_params = 0.9*cls.res_discrete.params,
                            method='bfgs', disp=0)
Beispiel #2
0
    def __init__(self):

        # generate artificial data
        np.random.seed(98765678)
        nobs = 200
        rvs = np.random.randn(nobs, 6)
        data_exog = rvs
        data_exog = sm.add_constant(data_exog, prepend=False)
        xbeta = 1 + 0.1 * rvs.sum(1)
        data_endog = np.random.poisson(np.exp(xbeta))

        mod_glm = sm.GLM(data_endog, data_exog, family=sm.families.Poisson())
        self.res_glm = mod_glm.fit()

        #estimate generic MLE
        #self.mod = PoissonGMLE(data_endog, data_exog)
        #res = self.mod.fit()

        #create offset variable based on first exog
        self.res_discrete = Poisson(data_endog, data_exog).fit(disp=0)
        offset = self.res_discrete.params[0] * data_exog[:, 0]  #1d ???

        #estimate discretemod.Poisson as benchmark, now has offset
        self.res_discrete = Poisson(data_endog,
                                    data_exog[:, 1:],
                                    offset=offset).fit(disp=0)

        # Note : ZI has one extra parameter
        self.res = PoissonZiGMLE(
            data_endog, data_exog[:, 1:], offset=offset).fit(
                start_params=np.r_[0.9 * self.res_discrete.params, 10],
                method='bfgs',
                disp=0)

        self.decimal = 4
Beispiel #3
0
def junk():  # FIXME: make this into a test, or move/remove
    # Singular Matrix in mod1a.fit()

    # same as Stata default
    formula2 = 'deaths ~ C(agecat) + C(smokes) : C(agecat)'

    mod = Poisson.from_formula(formula2, data=data,
                               exposure=data['pyears'].values)

    mod.fit()

    constraints = 'C(smokes)[T.1]:C(agecat)[3] = C(smokes)[T.1]:C(agec`at)[4]'

    import patsy
    lc = patsy.DesignInfo(mod.exog_names).linear_constraint(constraints)
    R, q = lc.coefs, lc.constants

    mod.fit_constrained(R, q, fit_kwds={'method': 'bfgs'})

    # example without offset
    formula1a = 'deaths ~ logpyears + smokes + C(agecat)'
    mod1a = Poisson.from_formula(formula1a, data=data)

    mod1a.fit()
    lc_1a = patsy.DesignInfo(mod1a.exog_names).linear_constraint(
        'C(agecat)[T.4] = C(agecat)[T.5]')
    mod1a.fit_constrained(lc_1a.coefs, lc_1a.constants,
                          fit_kwds={'method': 'newton'})
Beispiel #4
0
 def __init__(self,
              endog,
              exog,
              exog_infl=None,
              offset=None,
              exposure=None,
              inflation='logit',
              missing='none',
              **kwargs):
     super(ZeroInflatedPoisson, self).__init__(endog,
                                               exog,
                                               offset=offset,
                                               inflation=inflation,
                                               exog_infl=exog_infl,
                                               exposure=exposure,
                                               missing=missing,
                                               **kwargs)
     self.model_main = Poisson(self.endog,
                               self.exog,
                               offset=offset,
                               exposure=exposure)
     self.distribution = zipoisson
     self.result_class = ZeroInflatedPoissonResults
     self.result_class_wrapper = ZeroInflatedPoissonResultsWrapper
     self.result_class_reg = L1ZeroInflatedPoissonResults
     self.result_class_reg_wrapper = L1ZeroInflatedPoissonResultsWrapper
Beispiel #5
0
def junk():
    # Singular Matrix in mod1a.fit()

    formula1 = 'deaths ~ smokes + C(agecat)'

    formula2 = 'deaths ~ C(agecat) + C(smokes) : C(agecat)'  # same as Stata default

    mod = Poisson.from_formula(formula2, data=data, exposure=data['pyears'].values)

    res0 = mod.fit()

    constraints = 'C(smokes)[T.1]:C(agecat)[3] = C(smokes)[T.1]:C(agecat)[4]'

    import patsy
    lc = patsy.DesignInfo(mod.exog_names).linear_constraint(constraints)
    R, q = lc.coefs, lc.constants

    resc = mod.fit_constrained(R,q, fit_kwds={'method':'bfgs'})

    # example without offset
    formula1a = 'deaths ~ logpyears + smokes + C(agecat)'
    mod1a = Poisson.from_formula(formula1a, data=data)
    print(mod1a.exog.shape)

    res1a = mod1a.fit()
    lc_1a = patsy.DesignInfo(mod1a.exog_names).linear_constraint('C(agecat)[T.4] = C(agecat)[T.5]')
    resc1a = mod1a.fit_constrained(lc_1a.coefs, lc_1a.constants, fit_kwds={'method':'newton'})
    print(resc1a[0])
    print(resc1a[1])
    def setup_class(cls):

        expected_params = [1, 1, 0.5]
        np.random.seed(987123)
        nobs = 500
        exog = np.ones((nobs, 2))
        exog[:nobs // 2, 1] = 0
        # offset is used to create misspecification of the model
        # for predicted probabilities conditional moment test
        #offset = 0.5 * np.random.randn(nobs)
        #range_mix = 0.5
        #offset = -range_mix / 2 + range_mix * np.random.rand(nobs)
        offset = 0
        mu_true = np.exp(exog.dot(expected_params[:-1]) + offset)

        endog_poi = np.random.poisson(mu_true / 5)
        # endog3 = distr.zigenpoisson.rvs(mu_true, 0,
        #                                2, 0.01, size=mu_true.shape)

        model_poi = Poisson(endog_poi, exog)
        res_poi = model_poi.fit(method='bfgs', maxiter=5000, maxfun=5000)
        cls.exog = exog
        cls.endog = endog_poi
        cls.res = res_poi
        cls.nobs = nobs
    def poisson_regression(self, endog, exog, clean_data="greedy"):

        s = self.map_column_to_sheet(endog)

        arg_endog = endog
        arg_exog = exog

        # prepare data
        v = np.copy(exog)
        v = np.append(v, endog)
        dfClean = s.cleanData(v, clean_data)
        exog = sm.add_constant(dfClean[exog])
        endog = dfClean[endog]

        poisson = Poisson(endog, exog)
        fit = poisson.fit()

        utterance = (
            "Here are the results of a Poisson regression with endogenous variables "
        )
        utterance = (
            utterance
            + str(arg_endog)
            + " and exogenous variables "
            + str(arg_exog)
            + ".\n"
        )
        utterance = utterance + str(fit.summary())

        return QueryResult(fit.summary(), utterance)
def junk():
    # Singular Matrix in mod1a.fit()

    formula1 = 'deaths ~ smokes + C(agecat)'

    formula2 = 'deaths ~ C(agecat) + C(smokes) : C(agecat)'  # same as Stata default

    mod = Poisson.from_formula(formula2,
                               data=data,
                               exposure=data['pyears'].values)

    res0 = mod.fit()

    constraints = 'C(smokes)[T.1]:C(agecat)[3] = C(smokes)[T.1]:C(agecat)[4]'

    import patsy
    lc = patsy.DesignInfo(mod.exog_names).linear_constraint(constraints)
    R, q = lc.coefs, lc.constants

    resc = mod.fit_constrained(R, q, fit_kwds={'method': 'bfgs'})

    # example without offset
    formula1a = 'deaths ~ logpyears + smokes + C(agecat)'
    mod1a = Poisson.from_formula(formula1a, data=data)
    print(mod1a.exog.shape)

    res1a = mod1a.fit()
    lc_1a = patsy.DesignInfo(
        mod1a.exog_names).linear_constraint('C(agecat)[T.4] = C(agecat)[T.5]')
    resc1a = mod1a.fit_constrained(lc_1a.coefs,
                                   lc_1a.constants,
                                   fit_kwds={'method': 'newton'})
    print(resc1a[0])
    print(resc1a[1])
Beispiel #9
0
def test_netchop_improvement(key):
    res = Poisson(
        ddf[key].values,
        add_constant(ddf.method_simultaneous)
    ).fit()
    print(res.summary())
    return res
    def setup_class(cls):

        expected_params = [1, 1, 0.5]
        np.random.seed(987123)
        nobs = 500
        exog = np.ones((nobs, 2))
        exog[:nobs//2, 1] = 0
        # offset is used to create misspecification of the model
        # for predicted probabilities conditional moment test
        #offset = 0.5 * np.random.randn(nobs)
        #range_mix = 0.5
        #offset = -range_mix / 2 + range_mix * np.random.rand(nobs)
        offset = 0
        mu_true = np.exp(exog.dot(expected_params[:-1]) + offset)

        endog_poi = np.random.poisson(mu_true / 5)
        # endog3 = distr.zigenpoisson.rvs(mu_true, 0,
        #                                2, 0.01, size=mu_true.shape)

        model_poi = Poisson(endog_poi, exog)
        res_poi = model_poi.fit(method='bfgs', maxiter=5000, maxfun=5000)
        cls.exog = exog
        cls.endog = endog_poi
        cls.res = res_poi
        cls.nobs = nobs
Beispiel #11
0
    def _initialize(cls):
        y, x = cls.y, cls.x

        modp = Poisson(y, x)
        cls.res2 = modp.fit(disp=0)

        mod = PoissonPenalized(y, x)
        mod.pen_weight = 0
        cls.res1 = mod.fit(method='bfgs', maxiter=100, disp=0)

        cls.atol = 5e-6
Beispiel #12
0
    def _initialize(cls):
        y, x = cls.y, cls.x

        modp = Poisson(y, x)
        cls.res2 = modp.fit(disp=0)

        mod = PoissonPenalized(y, x)
        mod.pen_weight = 0
        cls.res1 = mod.fit(method='bfgs', maxiter=100, disp=0)

        cls.atol = 5e-6
Beispiel #13
0
    def setup_class(cls):
        # here we don't need to check convergence from default start_params
        start_params = [14.1709, 0.7085, -3.4548, -0.539, 3.2368,  -7.9299,
                        -5.0529]
        mod_poi = Poisson(endog, exog)
        res_poi = mod_poi.fit(start_params=start_params)
        marge_poi = res_poi.get_margeff(dummy=True)
        cls.res = res_poi
        cls.margeff = marge_poi

        cls.res1_slice = [0, 1, 2, 3, 5, 6]
        cls.res1 = res_stata.results_poisson_margins_dummy
Beispiel #14
0
    def setup_class(cls):
        df = data_bin
        mod = GLM(df['constrict'], df[['const', 'log_rate', 'log_volumne']],
                  family=families.Poisson())
        res = mod.fit(attach_wls=True, atol=1e-10)
        from statsmodels.discrete.discrete_model import Poisson
        mod2 = Poisson(df['constrict'],
                       df[['const', 'log_rate', 'log_volumne']])
        res2 = mod2.fit(tol=1e-10)

        cls.infl0 = res.get_influence()
        cls.infl1 = res2.get_influence()
Beispiel #15
0
    def _initialize(cls):
        y, x = cls.y, cls.x
        modp = Poisson(y, x[:, :cls.k_nonzero])
        cls.res2 = modp.fit(disp=0)

        mod = PoissonPenalized(y, x, penal=cls.penalty)
        mod.pen_weight *= 1.5
        mod.penal.tau = 0.05
        cls.res1 = mod.fit(method='bfgs', maxiter=100, disp=0)

        cls.exog_index = slice(None, cls.k_nonzero, None)

        cls.atol = 5e-3
Beispiel #16
0
    def _initialize(cls):
        y, x = cls.y, cls.x
        modp = Poisson(y, x[:, :cls.k_nonzero])
        cls.res2 = modp.fit(disp=0)

        mod = PoissonPenalized(y, x, penal=cls.penalty)
        mod.pen_weight *= 1.5
        mod.penal.tau = 0.05
        cls.res1 = mod.fit(method='bfgs', maxiter=100, disp=0)

        cls.exog_index = slice(None, cls.k_nonzero, None)

        cls.atol = 5e-3
Beispiel #17
0
    def setup_class(cls):
        # here we don't need to check convergence from default start_params
        start_params = [
            14.1709, 0.7085, -3.4548, -0.539, 3.2368, -7.9299, -5.0529
        ]
        mod_poi = Poisson(endog, exog)
        res_poi = mod_poi.fit(start_params=start_params)
        marge_poi = res_poi.get_margeff(dummy=True)
        cls.res = res_poi
        cls.margeff = marge_poi

        cls.res1_slice = [0, 1, 2, 3, 5, 6]
        cls.res1 = res_stata.results_poisson_margins_dummy
Beispiel #18
0
    def setup_class(cls):
        # here we don't need to check convergence from default start_params
        start_params = [14.1709, 0.7085, -3.4548, -0.539, 3.2368,  -7.9299,
                        -5.0529]
        mod_poi = Poisson(endog, exog)
        res_poi = mod_poi.fit(start_params=start_params)
        #res_poi = mod_poi.fit(maxiter=100)
        marge_poi = res_poi.get_margeff()
        cls.res = res_poi
        cls.margeff = marge_poi

        cls.rtol_fac = 1
        cls.res1_slice = slice(None, None, None)
        cls.res1 = res_stata.results_poisson_margins_cont
Beispiel #19
0
    def setup_class(cls):
        # here we don't need to check convergence from default start_params
        start_params = [
            14.1709, 0.7085, -3.4548, -0.539, 3.2368, -7.9299, -5.0529
        ]
        mod_poi = Poisson(endog, exog)
        res_poi = mod_poi.fit(start_params=start_params)
        #res_poi = mod_poi.fit(maxiter=100)
        marge_poi = res_poi.get_margeff()
        cls.res = res_poi
        cls.margeff = marge_poi

        cls.rtol_fac = 1
        cls.res1_slice = slice(None, None, None)
        cls.res1 = res_stata.results_poisson_margins_cont
    def setup_class(cls):

        cls.res2 = results.results_exposure_constraint2
        #cls.idx = [3, 4, 5, 6, 0, 1]  # 2 is dropped baseline for categorical
        cls.idx = [6, 2, 3, 4, 5, 0]  # 2 is dropped baseline for categorical

        # example without offset
        formula = 'deaths ~ smokes + C(agecat)'
        mod = Poisson.from_formula(formula,
                                   data=data,
                                   offset=np.log(data['pyears'].values))

        constr = 'C(agecat)[T.5] - C(agecat)[T.4] = 0.5'
        lc = patsy.DesignInfo(mod.exog_names).linear_constraint(constr)
        cls.res1 = fit_constrained(mod,
                                   lc.coefs,
                                   lc.constants,
                                   fit_kwds={
                                       'method': 'newton',
                                       'disp': 0
                                   })
        cls.constraints = lc
        # TODO: bfgs fails

        # test method of Poisson, not monkey patched
        cls.res1m = mod.fit_constrained(constr,
                                        method='bfgs',
                                        disp=0,
                                        start_params=cls.res1[0])
    def setup_class(cls):

        cls.res2 = results.results_noexposure_constraint2
        cls.idx = [7, 3, 4, 5, 6, 0, 1]  # 2 is dropped baseline for categorical

        # example without offset
        formula = 'deaths ~ logpyears + smokes + C(agecat)'
        mod = Poisson.from_formula(formula, data=data)

        # get start_params, example fails to converge on one py TravisCI
        k_vars = len(mod.exog_names)
        start_params = np.zeros(k_vars)
        start_params[0] = np.log(mod.endog.mean())
        # if we need it, this is desired params
        p = np.array([-9.43762015,  1.52762442,  2.74155711,  3.58730007,
                      4.08730007,  1.15987869,  0.12111539])

        constr = 'C(agecat)[T.5] - C(agecat)[T.4] = 0.5'
        lc = patsy.DesignInfo(mod.exog_names).linear_constraint(constr)
        cls.res1 = fit_constrained(mod, lc.coefs, lc.constants,
                                   start_params=start_params,
                                   fit_kwds={'method': 'bfgs', 'disp': 0})
        # TODO: Newton fails

        # test method of Poisson, not monkey patched
        cls.res1m = mod.fit_constrained(constr, start_params=start_params,
                                        method='bfgs', disp=0)
Beispiel #22
0
 def setupClass(cls):
     data = sm.datasets.randhie.load()
     exog = sm.add_constant(data.exog, prepend=False)
     cls.res1 = Poisson(data.endog, exog).fit(method='newton', disp=0)
     res2 = RandHIE()
     res2.poisson()
     cls.res2 = res2
    def setup_class(cls):

        cls.res2 = results.results_noexposure_constraint
        cls.idx = [7, 3, 4, 5, 6, 0, 1]  # 2 is dropped baseline for categorical

        # example without offset
        formula = 'deaths ~ logpyears + smokes + C(agecat)'
        mod = Poisson.from_formula(formula, data=data)
        #res1a = mod1a.fit()
        # get start_params, example fails to converge on one py TravisCI
        k_vars = len(mod.exog_names)
        start_params = np.zeros(k_vars)
        start_params[0] = np.log(mod.endog.mean())
        # if we need it, this is desired params
        p = np.array([-3.93478643,  1.37276214,  2.33077032,  2.71338891,
                      2.71338891, 0.57966535,  0.97254074])

        constr = 'C(agecat)[T.4] = C(agecat)[T.5]'
        lc = patsy.DesignInfo(mod.exog_names).linear_constraint(constr)
        cls.res1 = fit_constrained(mod, lc.coefs, lc.constants,
                                        start_params=start_params,
                                        fit_kwds={'method': 'bfgs',
                                                  'disp': 0})
        # TODO: Newton fails

        # test method of Poisson, not monkey patched
        cls.res1m = mod.fit_constrained(constr, start_params=start_params,
                                        method='bfgs', disp=0)
Beispiel #24
0
    def __init__(self):

        # generate artificial data
        np.random.seed(98765678)
        nobs = 200
        rvs = np.random.randn(nobs,6)
        data_exog = rvs
        data_exog = sm.add_constant(data_exog, prepend=False)
        xbeta = 1 + 0.1*rvs.sum(1)
        data_endog = np.random.poisson(np.exp(xbeta))

        #estimate discretemod.Poisson as benchmark
        self.res_discrete = Poisson(data_endog, data_exog).fit(disp=0)

        mod_glm = sm.GLM(data_endog, data_exog, family=sm.families.Poisson())
        self.res_glm = mod_glm.fit()

        #estimate generic MLE
        #self.mod = PoissonGMLE(data_endog, data_exog)
        #res = self.mod.fit()
        offset = self.res_discrete.params[0] * data_exog[:,0]  #1d ???
        #self.res = PoissonOffsetGMLE(data_endog, data_exog[:,1:], offset=offset).fit(start_params = np.ones(6)/2., method='nm')
        modo = PoissonOffsetGMLE(data_endog, data_exog[:,1:], offset=offset)
        self.res = modo.fit(start_params = 0.9*self.res_discrete.params[1:],
                            method='nm', disp=0)
Beispiel #25
0
    def test_compare_glm_poisson(self):
        res1 = self.res1m
        res2 = self.res2

        formula = 'deaths ~ smokes + C(agecat)'
        mod = Poisson.from_formula(formula, data=data,
                                   exposure=data['pyears'].values)
                                   #offset=np.log(data['pyears'].values))

        constr = 'C(agecat)[T.4] = C(agecat)[T.5]'
        res2 = mod.fit_constrained(constr, start_params=self.res1m.params,
                                   method='newton', warn_convergence=False,
                                   disp=0)

        # we get high precision because we use the params as start_params

        # basic, just as check that we have the same model
        assert_allclose(res1.params, res2.params, rtol=1e-12)
        assert_allclose(res1.bse, res2.bse, rtol=1e-12)

        # check predict, fitted, ...

        predicted = res1.predict()
        assert_allclose(predicted, res2.predict(), rtol=1e-10)
        assert_allclose(res1.mu, predicted, rtol=1e-10)
        assert_allclose(res1.fittedvalues, predicted, rtol=1e-10)
        assert_allclose(res2.predict(linear=True), res2.predict(linear=True),
                        rtol=1e-10)
Beispiel #26
0
    def setup_class(cls):

        cls.res2 = results.results_noexposure_constraint2
        cls.idx = [7, 3, 4, 5, 6, 0, 1]  # 2 is dropped baseline for categorical

        # example without offset
        formula = 'deaths ~ logpyears + smokes + C(agecat)'
        mod = Poisson.from_formula(formula, data=data)

        # get start_params, example fails to converge on one py TravisCI
        k_vars = len(mod.exog_names)
        start_params = np.zeros(k_vars)
        start_params[0] = np.log(mod.endog.mean())
        # if we need it, this is desired params
        p = np.array([-9.43762015,  1.52762442,  2.74155711,  3.58730007,
                      4.08730007,  1.15987869,  0.12111539])

        constr = 'C(agecat)[T.5] - C(agecat)[T.4] = 0.5'
        lc = patsy.DesignInfo(mod.exog_names).linear_constraint(constr)
        cls.res1 = fit_constrained(mod, lc.coefs, lc.constants,
                                   start_params=start_params,
                                   fit_kwds={'method': 'bfgs', 'disp': 0})
        # TODO: Newton fails

        # test method of Poisson, not monkey patched
        cls.res1m = mod.fit_constrained(constr, start_params=start_params,
                                        method='bfgs', disp=0)
Beispiel #27
0
    def setup_class(cls):

        cls.res2 = results.results_noexposure_constraint
        cls.idx = [7, 3, 4, 5, 6, 0, 1]  # 2 is dropped baseline for categorical

        # example without offset
        formula = 'deaths ~ logpyears + smokes + C(agecat)'
        mod = Poisson.from_formula(formula, data=data)
        #res1a = mod1a.fit()
        # get start_params, example fails to converge on one py TravisCI
        k_vars = len(mod.exog_names)
        start_params = np.zeros(k_vars)
        start_params[0] = np.log(mod.endog.mean())
        # if we need it, this is desired params
        p = np.array([-3.93478643,  1.37276214,  2.33077032,  2.71338891,
                      2.71338891, 0.57966535,  0.97254074])

        constr = 'C(agecat)[T.4] = C(agecat)[T.5]'
        lc = patsy.DesignInfo(mod.exog_names).linear_constraint(constr)
        cls.res1 = fit_constrained(mod, lc.coefs, lc.constants,
                                        start_params=start_params,
                                        fit_kwds={'method': 'bfgs',
                                                  'disp': 0})
        # TODO: Newton fails

        # test method of Poisson, not monkey patched
        cls.res1m = mod.fit_constrained(constr, start_params=start_params,
                                        method='bfgs', disp=0)
    def test_compare_glm_poisson(self):
        res1 = self.res1m
        res2 = self.res2

        formula = 'deaths ~ smokes + C(agecat)'
        mod = Poisson.from_formula(
            formula,
            data=data,
            #offset=np.log(data['pyears'].values))
            exposure=data['pyears'].values)

        constr = 'C(agecat)[T.4] = C(agecat)[T.5]'
        res2 = mod.fit_constrained(constr,
                                   start_params=self.res1m.params,
                                   method='newton',
                                   warn_convergence=False,
                                   disp=0)

        # we get high precision because we use the params as start_params

        # basic, just as check that we have the same model
        assert_allclose(res1.params, res2.params, rtol=1e-12)
        assert_allclose(res1.bse, res2.bse, rtol=1e-11)

        # check predict, fitted, ...

        predicted = res1.predict()
        assert_allclose(predicted, res2.predict(), rtol=1e-10)
        assert_allclose(res1.mu, predicted, rtol=1e-10)
        assert_allclose(res1.fittedvalues, predicted, rtol=1e-10)
        assert_allclose(res2.predict(linear=True),
                        res2.predict(linear=True),
                        rtol=1e-10)
Beispiel #29
0
 def setupClass(cls):
     from results.results_discrete import RandHIE
     data = sm.datasets.randhie.load()
     exog = sm.add_constant(data.exog)
     cls.res1 = Poisson(data.endog, exog).fit(method='newton', disp=0)
     res2 = RandHIE()
     res2.poisson()
     cls.res2 = res2
Beispiel #30
0
 def testSimulate(self):
     np.random.seed(123)
     beta0 = np.r_[1.1, 2.2, 3.3, 4.4]
     y, X = poisson.simulate(100, beta0)
     self.assertEqual(X.shape, (100, 4))
     self.assertEqual(y.shape, (100, ))
     # try to recover params using frequentist regression
     ml_fit = Poisson(y, X).fit()
     self.assertLess(np.linalg.norm(beta0 - ml_fit.params, 2), 2.0)
    def setup_class(cls):
        # copy-paste except for model
        nobs, k_vars = 500, 5

        np.random.seed(786452)
        x = np.random.randn(nobs, k_vars)
        x[:, 0] = 1
        x2 = np.random.randn(nobs, 2)
        xx = np.column_stack((x, x2))

        if cls.dispersed:
            het = np.random.randn(nobs)
            y = np.random.poisson(np.exp(x.sum(1) * 0.5 + het))
            #y_mc = np.random.negative_binomial(np.exp(x.sum(1) * 0.5), 2)
        else:
            y = np.random.poisson(np.exp(x.sum(1) * 0.5))

        cls.exog_extra = x2
        cls.model_full = Poisson(y, xx)
        cls.model_drop = Poisson(y, x)
Beispiel #32
0
    def setup_class(cls):

        cls.res2 = results.results_exposure_noconstraint
        cls.idx = [6, 2, 3, 4, 5, 0]  # 1 is dropped baseline for categorical

        # example without offset
        formula = 'deaths ~ smokes + C(agecat)'
        mod = Poisson.from_formula(formula, data=data,
                                   offset=np.log(data['pyears'].values))
        res1 = mod.fit(disp=0)._results
        # res1 is duplicate check, so we can follow the same pattern
        cls.res1 = (res1.params, res1.cov_params())
        cls.res1m = res1
Beispiel #33
0
    def setup_class(cls):

        cls.res2 = results.results_exposure_noconstraint
        cls.idx = [6, 2, 3, 4, 5, 0] # 1 is dropped baseline for categorical

        # example without offset
        formula = 'deaths ~ smokes + C(agecat)'
        mod = Poisson.from_formula(formula, data=data,
                                   #exposure=data['pyears'].values)
                                   offset=np.log(data['pyears'].values))
        res1 = mod.fit(disp=0)._results
        # res1 is duplicate check, so we can follow the same pattern
        cls.res1 = (res1.params, res1.cov_params())
        cls.res1m = res1
Beispiel #34
0
    def fit(self, rs: RecordSet) -> None:
        """
		fit a Probit regression mdl

		:param rs: The record set to fit with.
		"""
        # set params
        self.data = cp.deepcopy(rs)
        patterns = self.data.entries[:, :-1]
        out = self.data.entries[:, -1:]

        if self.add_intercept:
            intercept = np.ones((patterns.shape[0], 1))
            patterns = np.hstack((intercept, patterns))

        # avoid error
        if self.alpha == 0:
            raise Exception("Alpha Probit too low to obtain reliable results")

        self.model = Poisson(endog=out.ravel(), exog=patterns)
        self.model = self.model.fit_regularized(alpha=self.alpha,
                                                maxiter=10e8,
                                                disp=False)
    def setup_class(cls):
        from statsmodels.discrete.discrete_model import Poisson
        import statsmodels.stats.tests.test_anova as ttmod

        test = ttmod.TestAnova3()
        test.setup_class()
        cls.data = test.data.drop([0,1,2])

        mod = Poisson.from_formula("Days ~ C(Duration) + C(Weight)", cls.data)
        cls.res = mod.fit(cov_type='HC0')
        cls.term_name = "C(Weight)"
        cls.constraints = ['C(Weight)[T.2]',
                           'C(Weight)[T.3]',
                           'C(Weight)[T.3] - C(Weight)[T.2]']
Beispiel #36
0
 def __init__(self, endog, exog, exog_infl=None, offset=None, exposure=None,
              inflation='logit', missing='none', **kwargs):
     super(ZeroInflatedPoisson, self).__init__(endog, exog, offset=offset,
                                               inflation=inflation,
                                               exog_infl=exog_infl,
                                               exposure=exposure,
                                               missing=missing, **kwargs)
     self.model_main = Poisson(self.endog, self.exog, offset=offset,
                               exposure=exposure)
     self.distribution = zipoisson
     self.result_class = ZeroInflatedPoissonResults
     self.result_class_wrapper = ZeroInflatedPoissonResultsWrapper
     self.result_class_reg = L1ZeroInflatedPoissonResults
     self.result_class_reg_wrapper = L1ZeroInflatedPoissonResultsWrapper
Beispiel #37
0
    def setup_class(cls):
        from statsmodels.discrete.discrete_model import Poisson
        import statsmodels.stats.tests.test_anova as ttmod

        test = ttmod.TestAnova3()
        test.setup_class()
        cls.data = test.data.drop([0,1,2])

        mod = Poisson.from_formula("Days ~ C(Duration) + C(Weight)", cls.data)
        cls.res = mod.fit(cov_type='HC0')
        cls.term_name = "C(Weight)"
        cls.constraints = ['C(Weight)[T.2]',
                           'C(Weight)[T.3]',
                           'C(Weight)[T.3] - C(Weight)[T.2]']
    def setup_class(cls):

        cls.res2 = results.results_exposure_constraint
        #cls.idx = [3, 4, 5, 6, 0, 1]  # 2 is dropped baseline for categorical
        cls.idx = [6, 2, 3, 4, 5, 0]  # 2 is dropped baseline for categorical

        # example without offset
        formula = 'deaths ~ smokes + C(agecat)'
        mod = Poisson.from_formula(formula, data=data,
                                   offset=np.log(data['pyears'].values))
        #res1a = mod1a.fit()
        constr = 'C(agecat)[T.4] = C(agecat)[T.5]'
        lc = patsy.DesignInfo(mod.exog_names).linear_constraint(constr)
        cls.res1 = fit_constrained(mod, lc.coefs, lc.constants,
                                   fit_kwds={'method':'newton'})
        cls.constraints = lc
        # TODO: bfgs fails

        # test method of Poisson, not monkey patched
        cls.res1m = mod.fit_constrained(constr, method='newton')
Beispiel #39
0
    def __init__(self):

        # generate artificial data
        np.random.seed(98765678)
        nobs = 200
        rvs = np.random.randn(nobs,6)
        data_exog = rvs
        data_exog = sm.add_constant(data_exog, prepend=False)
        xbeta = 0.1 + 0.1*rvs.sum(1)
        data_endog = np.random.poisson(np.exp(xbeta))

        #estimate discretemod.Poisson as benchmark
        self.res_discrete = Poisson(data_endog, data_exog).fit(disp=0)

        mod_glm = sm.GLM(data_endog, data_exog, family=sm.families.Poisson())
        self.res_glm = mod_glm.fit()

        #estimate generic MLE
        self.mod = PoissonGMLE(data_endog, data_exog)
        self.res = self.mod.fit(start_params=0.9 * self.res_discrete.params,
                                method='nm', disp=0)
    def setup_class(cls):

        cls.res2 = results.results_exposure_constraint2
        #cls.idx = [3, 4, 5, 6, 0, 1]  # 2 is dropped baseline for categorical
        cls.idx = [6, 2, 3, 4, 5, 0]  # 2 is dropped baseline for categorical

        # example without offset
        formula = 'deaths ~ smokes + C(agecat)'
        mod = Poisson.from_formula(formula, data=data,
                                   #offset=np.log(data['pyears'].values))
                                   exposure=data['pyears'].values)
        #res1a = mod1a.fit()
        constr = 'C(agecat)[T.5] - C(agecat)[T.4] = 0.5'
        lc = patsy.DesignInfo(mod.exog_names).linear_constraint(constr)
        cls.res1 = fit_constrained(mod, lc.coefs, lc.constants,
                                   fit_kwds={'method': 'newton',
                                             'disp': 0})
        cls.constraints = lc
        # TODO: bfgs fails to converge. overflow somewhere?

        # test method of Poisson, not monkey patched
        cls.res1m = mod.fit_constrained(constr, method='bfgs', disp=0,
                                        start_params=cls.res1[0])
    def test_spec_tests(self):
        # regression test, numbers similar to Monte Carlo simulation
        res_dispersion = np.array([[0.1396096387543, 0.8889684245877],
                                   [0.1396096387543, 0.8889684245877],
                                   [0.2977840351238, 0.7658680002106],
                                   [0.1307899995877, 0.8959414342111],
                                   [0.1307899995877, 0.8959414342111],
                                   [0.1357101381056, 0.8920504328246],
                                   [0.2776587511235, 0.7812743277372]])

        res_zi = np.array([
            [00.1389582826821, 0.7093188241734],
            [-0.3727710861669, 0.7093188241734],
            [-0.2496729648642, 0.8028402670888],
            [00.0601651553909, 0.8062350958880],
        ])

        respoi = Poisson(self.endog, self.exog).fit(disp=0)
        dia = PoissonDiagnostic(respoi)
        t_disp = dia.test_dispersion()[0]
        assert_allclose(t_disp, res_dispersion, rtol=1e-8)

        nobs = self.endog.shape[0]
        t_zi_jh = dia.test_poisson_zeroinflation(method="broek",
                                                 exog_infl=np.ones(nobs))
        t_zib = dia.test_poisson_zeroinflation(method="broek")
        t_zim = dia.test_poisson_zeroinflation(method="prob")
        t_zichi2 = dia.test_chisquare_prob(bin_edges=np.arange(3))

        t_zi = np.vstack([t_zi_jh[:2], t_zib[:2], t_zim[:2], t_zichi2[:2]])
        assert_allclose(t_zi, res_zi, rtol=1e-8)

        # test jansakul and hinde with exog_infl
        t_zi_ex = dia.test_poisson_zeroinflation(method="broek",
                                                 exog_infl=self.exog)
        res_zi_ex = np.array([3.7813218150779, 0.1509719973257])
        assert_allclose(t_zi_ex[:2], res_zi_ex, rtol=1e-8)
Beispiel #42
0
def test_poisson_screening():

    np.random.seed(987865)

    y, x, idx_nonzero_true, beta = _get_poisson_data()
    nobs = len(y)

    xnames_true = ['var%4d' % ii for ii in idx_nonzero_true]
    xnames_true[0] = 'const'
    parameters = pd.DataFrame(beta[idx_nonzero_true],
                              index=xnames_true,
                              columns=['true'])

    xframe_true = pd.DataFrame(x[:, idx_nonzero_true], columns=xnames_true)
    res_oracle = Poisson(y, xframe_true).fit()
    parameters['oracle'] = res_oracle.params

    mod_initial = PoissonPenalized(y, np.ones(nobs), pen_weight=nobs * 5)

    screener = VariableScreening(mod_initial)
    exog_candidates = x[:, 1:]
    res_screen = screener.screen_exog(exog_candidates, maxiter=10)

    assert_equal(np.sort(res_screen.idx_nonzero), idx_nonzero_true)

    xnames = ['var%4d' % ii for ii in res_screen.idx_nonzero]
    xnames[0] = 'const'

    # smoke test
    res_screen.results_final.summary(xname=xnames)
    res_screen.results_pen.summary()
    assert_equal(res_screen.results_final.mle_retvals['converged'], True)

    ps = pd.Series(res_screen.results_final.params, index=xnames, name='final')
    parameters = parameters.join(ps, how='outer')

    assert_allclose(parameters['oracle'], parameters['final'], atol=5e-6)
Beispiel #43
0
    def initialize(cls):
        from statsmodels.discrete.discrete_model import Poisson

        mod = Poisson.from_formula("Days ~ C(Duration, Sum)*C(Weight, Sum)",
                                   cls.data)
        cls.res = mod.fit(cov_type='HC0')
Beispiel #44
0
class PredictPlayerStats(ConvertMixin):

    def __init__(self, engine, player_name, stat_to_predict, opposing_team_name,
                 predictor_stats=('csum_min_kills', 'csum_min_minions_killed'),
                 defense_predictor_stats=('csum_prev_min_allowed_kills', 'csum_prev_min_allowed_assists'),
                 game_range=None):
        self.engine = engine
        self.player_name = player_name
        self.stat_to_predict = stat_to_predict
        if predictor_stats:
            self.predictor_stats = ('csum_prev_min_kills', 'csum_prev_min_minions_killed')
        else:
            self.predictor_stats = ('csum_prev_min_kills', 'csum_prev_min_minions_killed')
        role_stats = ('Jungler', 'Mid', 'Coach', 'Support', 'AD', 'Sub', 'Top')
        self.predictor_stats = self.predictor_stats + defense_predictor_stats + role_stats
        self.opposing_team_name = opposing_team_name
        self.player_stats_table_name = 'player_stats_df'
        self.processed_player_stars_table_name = 'processed_player_stats_df'
        self.key_stats = ('kills', 'deaths', 'assists', 'minions_killed', 'gold',
                          'k_a', 'a_over_k')
        self.game_range = game_range
        self._process_player_stats_and_train()

    def _process_player_stats_and_train(self):
        processed_player_stats_df = self._get_processed_player_stats_in_df()
        self.latest_predictor_numpy_array = self._get_latest_player_stats_numpy_array(processed_player_stats_df)
        print('latest predictors numpy array {}'.format(self.latest_predictor_numpy_array))
        predictors, y_array = self._get_predictors_in_numpy_arrays(processed_player_stats_df)
        self._train_model(predictors, y_array)

    def _get_latest_player_stats_numpy_array(self, processed_player_stats_df):
        player_id = self._get_player_id_by_player_name(self.player_name)
        player_stats_df = processed_player_stats_df[processed_player_stats_df['player_id'] == player_id]
        latest_player_stats_df = player_stats_df.sort(['game_id'], ascending=False).head(1)
        dict_player = latest_player_stats_df.to_dict('records')[0]
        player_predictor_stats = []
        for predictor_stat in self.predictor_stats:
            # print('processing predictor stat {}'.format(predictor_stat))
            player_predictor_stats.append(dict_player[predictor_stat])
        latest_predictor_numpy_array = numpy.array([player_predictor_stats])
        return latest_predictor_numpy_array

    def _get_predictors_in_numpy_arrays(self, processed_player_stats_df):
        player_game_records = self._get_predictors(processed_player_stats_df)
        game_list = []
        y_array_list = []
        for player_game_record in player_game_records:
            game_predictor_stats = []
            if not (numpy.isnan(player_game_record['csum_prev_min_kills'])
                    or numpy.isnan(player_game_record['csum_prev_min_allowed_kills'])):
                if player_game_record['csum_prev_min_assists'] != 0:
                    prev_predictor_stats = self._convert_predictors_to_prev_csum(self.predictor_stats)
                    for prev_predictor_stat in prev_predictor_stats:
                        game_predictor_stats.append(player_game_record[prev_predictor_stat])
                    game_list.append(game_predictor_stats)
                    y_array_list.append(player_game_record['y_element'])
        predictors = numpy.array(game_list)
        y_array = numpy.array([y_array_list])
        return predictors, y_array

    def _get_predictors(self, processed_player_stats_df):
        player_game_records = processed_player_stats_df.to_dict('records')
        player_game_records.sort(key=itemgetter('game_id'))
        for player_game_record in player_game_records:
            player_game_record['y_element'] = player_game_record[self.stat_to_predict]
        return player_game_records

    def _train_model(self, predictors, y_array):
        y_1darray = numpy.squeeze(y_array)
        self.poisson = Poisson(y_1darray, predictors)
        self.pos_result = self.poisson.fit(method='bfgs')

    def _get_game_ids_from_database(self):
        game_ids_row = Game.objects.values_list('id', flat=True)
        game_ids = [game for game in game_ids_row]
        return game_ids

    def _get_lastest_processed_team_stats_by_name(self):
        return ProcessedTeamStatsDf.objects.filter(name=self.opposing_team_name).order_by('-id').first()

    def _get_game_by_ids(self, game_ids):
        return Game.objects.filter(id__in=game_ids)

    def _get_player_id_by_player_name(self, player_name):
        player = Player.objects.filter(name=player_name)
        return player[0].id

    def _get_processed_player_stats_in_df(self):
        game_ids = self._get_game_ids_from_database()
        last_game_number = game_ids[-1]
        has_processed_team_stats_table = self.engine.has_table(self.processed_player_stars_table_name)
        if has_processed_team_stats_table:
            df_game_stats = pandas.read_sql(self.player_stats_table_name, self.engine)
            df_game_stats_all = df_game_stats[df_game_stats.game_id.isin(game_ids)]
            # Using game_numbers here since we need the last few games to check.
            max_game_id_cached = df_game_stats_all['game_id'].max()
            max_index_cached = df_game_stats_all['index'].max()
            if pandas.isnull(max_game_id_cached):
                max_game_id_cached = game_ids[0]
            # Check if all the game numbers have been cached,
            # if not return what game to start form and what game to end from.
            if max_game_id_cached != last_game_number:
                # Get the index of the max_game_id
                max_game_id_index = game_ids.index(max_game_id_cached)
                # Trim down the list to only the games that need to be retrieved,
                # start from the max_id + 1 because we don't
                # want to count max_id we already have it
                game_ids_to_find = game_ids[max_game_id_index:]
                games = self._get_game_by_ids(game_ids_to_find)
                player_stats_df = self._get_player_stats_in_df(games, max_index_cached)
                self._insert_into_player_stats_df_tables(player_stats_df)
            else:
                # If everything was cached return cached as true and just return the last numbers
                # I could do this part better.
                print("everything cached no need to retrieve from api")
        else:
            _get_player_stats_in_df = 0
            # Table did not exist, have to get all
            games = self._get_game_by_ids(game_ids)
            player_stats_df = self._get_player_stats_in_df(games, _get_player_stats_in_df)
            print('table does not exist inserting full table')
            self._insert_into_player_stats_df_tables(player_stats_df)
            print('table inserted')
        if self.game_range == '5':
            processed_player_stats_df = pandas.read_sql('select * from processed_player_stats_df_limit_5',
                                                              con=self.engine)
        elif self.game_range == '10':
            processed_player_stats_df = pandas.read_sql('select * from processed_player_stats_df_limit_10',
                                                              con=self.engine)
        else:
            processed_player_stats_df = pandas.read_sql_table(self.processed_player_stars_table_name, self.engine)
        return processed_player_stats_df

    def _process_player_stats_df(self, player_stats_df):
        player_stats_df = player_stats_df.sort(['game_id', 'player_id'])
        key_stats = ['game_length_minutes'] + (list(self.key_stats))
        player_stats_df['clean_kills'] = player_stats_df['kills']
        player_stats_df.ix[player_stats_df.clean_kills == 0, 'clean_kills'] = 1
        player_stats_df['k_a'] = \
            player_stats_df['kills'] + player_stats_df['assists']
        player_stats_df['a_over_k'] = \
            player_stats_df['assists'] / player_stats_df['clean_kills']
        player_stats_for_pivot = player_stats_df[['player_name', 'role']]
        player_stats_for_pivot['value'] = 1
        player_pivot_df = player_stats_for_pivot.pivot_table(index='player_name', columns='role', values='value')
        player_pivot_df.fillna(0, inplace=True)
        player_pivot_df.reset_index(inplace=True)
        player_stats_df = pandas.merge(player_stats_df, player_pivot_df, on='player_name')
        for key_stat in key_stats:
            print('doing key stats {}'.format(key_stat))
            player_stats_df['csum_{}'.format(key_stat)] = player_stats_df.groupby(by='player_id')[key_stat].cumsum()
            player_stats_df['csum_prev_{}'.format(key_stat)] = \
                player_stats_df['csum_{}'.format(key_stat)] - player_stats_df[key_stat]
            # player_stats_df['csum_prev_avg_{}'.format(key_stat)] = \
            #     player_stats_df['csum_prev_{}'.format(key_stat)] / player_stats_df['csum_prev_game_number']
            player_stats_df['per_min_{}'.format(key_stat)] = player_stats_df[key_stat] / \
                                                             player_stats_df['game_length_minutes']
            if key_stat not in ['game_number', 'game_length_minutes']:
                print('doing stats not game_number {}'.format(key_stat))
                player_stats_df['csum_min_{}'.format(key_stat)] = \
                    player_stats_df['csum_{}'.format(key_stat)] / player_stats_df['csum_game_length_minutes']
                player_stats_df['csum_prev_min_{}'.format(key_stat)] = \
                    player_stats_df['csum_prev_{}'.format(key_stat)] / player_stats_df['csum_prev_game_length_minutes']
                player_stats_df['csum_prev_min_{}'.format(key_stat)].fillna(0, inplace=True)
            player_stats_df = player_stats_df.sort(['game_id'])
        return player_stats_df

    def _get_player_stats_in_df(self, games, max_index_cached):
        player_stats_df = None
        for game in games:
            players_stats = self._convert_game_to_player_stats_df(game)
            if player_stats_df is None:
                player_stats_df = pandas.DataFrame(players_stats, index=list(range(max_index_cached, (max_index_cached + 10))))
            else:
                single_game_player_stats_df = pandas.DataFrame(players_stats, index=list(range(max_index_cached, (max_index_cached + 10))))
                player_stats_df = player_stats_df.append(single_game_player_stats_df)
            max_index_cached += 10
        return player_stats_df

    def _convert_game_to_player_stats_df(self, game):
        players_stats = game.playerstats_set.all()
        players_stats_dict = game.playerstats_set.all().values()
        player_stats_list = []
        for player_stats, player_stats_dict in zip(players_stats, players_stats_dict):
            player_stats_dict['game_length_minutes'] = float(game.game_length_minutes)
            player_stats_dict['gold'] = float(player_stats_dict['gold'])
            player_stats_dict['player_name'] = player_stats.player.name
            self._populate_player_stats_with_defense_stats(player_stats_dict, player_stats, game)
            player_stats_list.append(player_stats_dict)
        return player_stats_list

    def _populate_player_stats_with_defense_stats(self, player_stats_dict, player_stats, game):
        current_team = player_stats.team
        processed_team_stats_dict = game.processedteamstatsdf_set.exclude(team_name=current_team).values()[0]
        for key_stat in self.key_stats:
            player_stats_dict['csum_prev_min_allowed_{}'.format(key_stat)] = \
                processed_team_stats_dict['csum_prev_min_allowed_{}'.format(key_stat)]
            player_stats_dict['csum_min_allowed_{}'.format(key_stat)] = \
                processed_team_stats_dict['csum_min_allowed_{}'.format(key_stat)]

    def _insert_into_player_stats_df_tables(self, player_stats_df):
        player_stats_df.to_sql(self.player_stats_table_name, self.engine, if_exists='append')
        # Could be optimized kinda a hack
        player_stats_df = pandas.read_sql("select ps.*, p.role, p.image from player_stats_df ps, player p "
                                          "where ps.player_id = p.id", self.engine)
        processed_team_stats_df = self._process_player_stats_df(player_stats_df)
        processed_team_stats_df.to_sql(self.processed_player_stars_table_name, self.engine, if_exists='append')

    def predict_player_stat(self):
        #reshaped_numpy_array = numpy.reshape(self.latest_predictor_numpy_array, 3,1)
        probability_in_numpy_array = self.poisson.predict(self.pos_result.params, self.latest_predictor_numpy_array)
        return {self.player_name: probability_in_numpy_array}
Beispiel #45
0
 def _train_model(self, predictors, y_array):
     y_1darray = numpy.squeeze(y_array)
     self.poisson = Poisson(y_1darray, predictors)
     self.pos_result = self.poisson.fit(method='bfgs')
Beispiel #46
0
class ZeroInflatedPoisson(GenericZeroInflated):
    __doc__ = """
    Poisson Zero Inflated model for count data

    %(params)s
    %(extra_params)s

    Attributes
    -----------
    endog : array
        A reference to the endogenous response variable
    exog : array
        A reference to the exogenous design.
    exog_infl: array
        A reference to the zero-inflated exogenous design.
    """ % {'params' : base._model_params_doc,
           'extra_params' : _doc_zi_params + base._missing_param_doc}

    def __init__(self, endog, exog, exog_infl=None, offset=None, exposure=None,
                 inflation='logit', missing='none', **kwargs):
        super(ZeroInflatedPoisson, self).__init__(endog, exog, offset=offset,
                                                  inflation=inflation,
                                                  exog_infl=exog_infl,
                                                  exposure=exposure,
                                                  missing=missing, **kwargs)
        self.model_main = Poisson(self.endog, self.exog, offset=offset,
                                  exposure=exposure)
        self.distribution = zipoisson
        self.result_class = ZeroInflatedPoissonResults
        self.result_class_wrapper = ZeroInflatedPoissonResultsWrapper
        self.result_class_reg = L1ZeroInflatedPoissonResults
        self.result_class_reg_wrapper = L1ZeroInflatedPoissonResultsWrapper

    def _hessian_main(self, params):
        params_infl = params[:self.k_inflate]
        params_main = params[self.k_inflate:]

        y = self.endog
        w = self.model_infl.predict(params_infl)
        w = np.clip(w, np.finfo(float).eps, 1 - np.finfo(float).eps)
        score = self.score(params)
        zero_idx = np.nonzero(y == 0)[0]
        nonzero_idx = np.nonzero(y)[0]

        mu = self.model_main.predict(params_main)

        hess_arr = np.zeros((self.k_exog, self.k_exog))

        coeff = (1 + w[zero_idx] * (np.exp(mu[zero_idx]) - 1))

        #d2l/dp2
        for i in range(self.k_exog):
            for j in range(i, -1, -1):
                hess_arr[i, j] = ((
                    self.exog[zero_idx, i] * self.exog[zero_idx, j] *
                    mu[zero_idx] * (w[zero_idx] - 1) * (1 / coeff -
                    w[zero_idx] * mu[zero_idx] * np.exp(mu[zero_idx]) /
                    coeff**2)).sum() - (mu[nonzero_idx] * self.exog[nonzero_idx, i] *
                    self.exog[nonzero_idx, j]).sum())

        return hess_arr

    def _predict_prob(self, params, exog, exog_infl, exposure, offset):
        params_infl = params[:self.k_inflate]
        params_main = params[self.k_inflate:]

        counts = np.atleast_2d(np.arange(0, np.max(self.endog)+1))

        if len(exog_infl.shape) < 2:
            transform = True
            w = np.atleast_2d(
                self.model_infl.predict(params_infl, exog_infl))[:, None]
        else:
            transform = False
            w = self.model_infl.predict(params_infl, exog_infl)[:, None]

        w = np.clip(w, np.finfo(float).eps, 1 - np.finfo(float).eps)
        mu = self.model_main.predict(params_main, exog,
            offset=offset)[:, None]
        result = self.distribution.pmf(counts, mu, w)
        return result[0] if transform else result

    def _get_start_params(self):
        start_params = self.model_main.fit(disp=0, method="nm").params
        start_params = np.append(np.ones(self.k_inflate) * 0.1, start_params)
        return start_params
Beispiel #47
0
from statsmodels.formula.api import ols, glm, poisson
from statsmodels.discrete.discrete_model import Poisson

import statsmodels.stats.tests.test_anova as ttmod

test = ttmod.TestAnova3()
test.setupClass()

data = test.data.drop([0,1,2])
res_ols = ols("np.log(Days+1) ~ C(Duration, Sum)*C(Weight, Sum)", data).fit(use_t=False)

res_glm = glm("np.log(Days+1) ~ C(Duration, Sum)*C(Weight, Sum)",
                        data).fit()

res_poi = Poisson.from_formula("Days ~ C(Weight) * C(Duration)", data).fit(cov_type='HC0')
res_poi_2 = poisson("Days ~ C(Weight) + C(Duration)", data).fit(cov_type='HC0')

print('\nOLS')
print(res_ols.wald_test_terms())
print('\nGLM')
print(res_glm.wald_test_terms(skip_single=False, combine_terms=['Duration', 'Weight']))
print('\nPoisson 1')
print(res_poi.wald_test_terms(skip_single=False, combine_terms=['Duration', 'Weight']))
print('\nPoisson 2')
print(res_poi_2.wald_test_terms(skip_single=False))

from statsmodels.discrete.discrete_model import NegativeBinomial
res_nb2 = NegativeBinomial.from_formula("Days ~ C(Weight) * C(Duration)", data).fit()
print('\nNegative Binomial nb2')
print(res_nb2.wald_test_terms(skip_single=False))
    def initialize(cls):
        from statsmodels.discrete.discrete_model import Poisson

        mod = Poisson.from_formula("Days ~ C(Duration, Sum)*C(Weight, Sum)", cls.data)
        cls.res = mod.fit(cov_type='HC0')
Beispiel #49
0
    nobs = 1000
    rvs = np.random.randn(nobs,6)
    data_exog = rvs
    data_exog = sm.add_constant(data_exog)
    xbeta = 1 + 0.1*rvs.sum(1)
    data_endog = np.random.poisson(np.exp(xbeta))
    #print data_endog

    modp = PoissonGMLE(data_endog, data_exog)
    resp = modp.fit()
    print resp.params
    print resp.bse


    from statsmodels.discrete.discrete_model import Poisson
    resdp = Poisson(data_endog, data_exog).fit()
    print '\ncompare with discretemod'
    print 'compare params'
    print resdp.params - resp.params
    print 'compare bse'
    print resdp.bse - resp.bse

    gmlp = sm.GLM(data_endog, data_exog, family=sm.families.Poisson())
    resgp = gmlp.fit()
    ''' this creates a warning, bug bse is double defined ???
    c:\josef\eclipsegworkspace\statsmodels-josef-experimental-gsoc\scikits\statsmodels\decorators.py:105: CacheWriteWarning: The attribute 'bse' cannot be overwritten
      warnings.warn(errmsg, CacheWriteWarning)
    '''
    print '\ncompare with GLM'
    print 'compare params'
    print resgp.params - resp.params