예제 #1
0
    def setup_class(cls):
        # generate artificial data
        np.random.seed(98765678)
        nobs = 200
        rvs = np.random.randn(nobs,6)
        data_exog = rvs
        data_exog = sm.add_constant(data_exog, prepend=False)
        xbeta = 1 + 0.1*rvs.sum(1)
        data_endog = np.random.poisson(np.exp(xbeta))

        mod_glm = sm.GLM(data_endog, data_exog, family=sm.families.Poisson())
        cls.res_glm = mod_glm.fit()

        #estimate generic MLE
        #cls.mod = PoissonGMLE(data_endog, data_exog)
        #res = cls.mod.fit()

        #create offset variable based on first exog
        cls.res_discrete = Poisson(data_endog, data_exog).fit(disp=0)
        offset = cls.res_discrete.params[0] * data_exog[:,0]  #1d ???

        #estimate discretemod.Poisson as benchmark, now has offset
        cls.res_discrete = Poisson(data_endog, data_exog[:,1:],
                                    offset=offset).fit(disp=0)

        mod_glm = sm.GLM(data_endog, data_exog, family=sm.families.Poisson())
        cls.res_glm = mod_glm.fit()

        #cls.res = PoissonOffsetGMLE(data_endog, data_exog[:,1:], offset=offset).fit(start_params = np.ones(6)/2., method='nm')
        modo = PoissonOffsetGMLE(data_endog, data_exog[:,1:], offset=offset)
        cls.res = modo.fit(start_params = 0.9*cls.res_discrete.params,
                            method='bfgs', disp=0)
예제 #2
0
    def __init__(self):

        # generate artificial data
        np.random.seed(98765678)
        nobs = 200
        rvs = np.random.randn(nobs, 6)
        data_exog = rvs
        data_exog = sm.add_constant(data_exog, prepend=False)
        xbeta = 1 + 0.1 * rvs.sum(1)
        data_endog = np.random.poisson(np.exp(xbeta))

        mod_glm = sm.GLM(data_endog, data_exog, family=sm.families.Poisson())
        self.res_glm = mod_glm.fit()

        #estimate generic MLE
        #self.mod = PoissonGMLE(data_endog, data_exog)
        #res = self.mod.fit()

        #create offset variable based on first exog
        self.res_discrete = Poisson(data_endog, data_exog).fit(disp=0)
        offset = self.res_discrete.params[0] * data_exog[:, 0]  #1d ???

        #estimate discretemod.Poisson as benchmark, now has offset
        self.res_discrete = Poisson(data_endog,
                                    data_exog[:, 1:],
                                    offset=offset).fit(disp=0)

        # Note : ZI has one extra parameter
        self.res = PoissonZiGMLE(
            data_endog, data_exog[:, 1:], offset=offset).fit(
                start_params=np.r_[0.9 * self.res_discrete.params, 10],
                method='bfgs',
                disp=0)

        self.decimal = 4
예제 #3
0
 def setupClass(cls):
     data = sm.datasets.randhie.load()
     exog = sm.add_constant(data.exog, prepend=False)
     cls.res1 = Poisson(data.endog, exog).fit(method='newton', disp=0)
     res2 = RandHIE()
     res2.poisson()
     cls.res2 = res2
예제 #4
0
 def __init__(self,
              endog,
              exog,
              exog_infl=None,
              offset=None,
              exposure=None,
              inflation='logit',
              missing='none',
              **kwargs):
     super(ZeroInflatedPoisson, self).__init__(endog,
                                               exog,
                                               offset=offset,
                                               inflation=inflation,
                                               exog_infl=exog_infl,
                                               exposure=exposure,
                                               missing=missing,
                                               **kwargs)
     self.model_main = Poisson(self.endog,
                               self.exog,
                               offset=offset,
                               exposure=exposure)
     self.distribution = zipoisson
     self.result_class = ZeroInflatedPoissonResults
     self.result_class_wrapper = ZeroInflatedPoissonResultsWrapper
     self.result_class_reg = L1ZeroInflatedPoissonResults
     self.result_class_reg_wrapper = L1ZeroInflatedPoissonResultsWrapper
예제 #5
0
    def setup_class(cls):

        expected_params = [1, 1, 0.5]
        np.random.seed(987123)
        nobs = 500
        exog = np.ones((nobs, 2))
        exog[:nobs // 2, 1] = 0
        # offset is used to create misspecification of the model
        # for predicted probabilities conditional moment test
        #offset = 0.5 * np.random.randn(nobs)
        #range_mix = 0.5
        #offset = -range_mix / 2 + range_mix * np.random.rand(nobs)
        offset = 0
        mu_true = np.exp(exog.dot(expected_params[:-1]) + offset)

        endog_poi = np.random.poisson(mu_true / 5)
        # endog3 = distr.zigenpoisson.rvs(mu_true, 0,
        #                                2, 0.01, size=mu_true.shape)

        model_poi = Poisson(endog_poi, exog)
        res_poi = model_poi.fit(method='bfgs', maxiter=5000, maxfun=5000)
        cls.exog = exog
        cls.endog = endog_poi
        cls.res = res_poi
        cls.nobs = nobs
    def poisson_regression(self, endog, exog, clean_data="greedy"):

        s = self.map_column_to_sheet(endog)

        arg_endog = endog
        arg_exog = exog

        # prepare data
        v = np.copy(exog)
        v = np.append(v, endog)
        dfClean = s.cleanData(v, clean_data)
        exog = sm.add_constant(dfClean[exog])
        endog = dfClean[endog]

        poisson = Poisson(endog, exog)
        fit = poisson.fit()

        utterance = (
            "Here are the results of a Poisson regression with endogenous variables "
        )
        utterance = (
            utterance
            + str(arg_endog)
            + " and exogenous variables "
            + str(arg_exog)
            + ".\n"
        )
        utterance = utterance + str(fit.summary())

        return QueryResult(fit.summary(), utterance)
예제 #7
0
    def __init__(self):

        # generate artificial data
        np.random.seed(98765678)
        nobs = 200
        rvs = np.random.randn(nobs,6)
        data_exog = rvs
        data_exog = sm.add_constant(data_exog, prepend=False)
        xbeta = 1 + 0.1*rvs.sum(1)
        data_endog = np.random.poisson(np.exp(xbeta))

        #estimate discretemod.Poisson as benchmark
        self.res_discrete = Poisson(data_endog, data_exog).fit(disp=0)

        mod_glm = sm.GLM(data_endog, data_exog, family=sm.families.Poisson())
        self.res_glm = mod_glm.fit()

        #estimate generic MLE
        #self.mod = PoissonGMLE(data_endog, data_exog)
        #res = self.mod.fit()
        offset = self.res_discrete.params[0] * data_exog[:,0]  #1d ???
        #self.res = PoissonOffsetGMLE(data_endog, data_exog[:,1:], offset=offset).fit(start_params = np.ones(6)/2., method='nm')
        modo = PoissonOffsetGMLE(data_endog, data_exog[:,1:], offset=offset)
        self.res = modo.fit(start_params = 0.9*self.res_discrete.params[1:],
                            method='nm', disp=0)
예제 #8
0
def test_netchop_improvement(key):
    res = Poisson(
        ddf[key].values,
        add_constant(ddf.method_simultaneous)
    ).fit()
    print(res.summary())
    return res
예제 #9
0
 def setupClass(cls):
     from results.results_discrete import RandHIE
     data = sm.datasets.randhie.load()
     exog = sm.add_constant(data.exog)
     cls.res1 = Poisson(data.endog, exog).fit(method='newton', disp=0)
     res2 = RandHIE()
     res2.poisson()
     cls.res2 = res2
예제 #10
0
 def testSimulate(self):
     np.random.seed(123)
     beta0 = np.r_[1.1, 2.2, 3.3, 4.4]
     y, X = poisson.simulate(100, beta0)
     self.assertEqual(X.shape, (100, 4))
     self.assertEqual(y.shape, (100, ))
     # try to recover params using frequentist regression
     ml_fit = Poisson(y, X).fit()
     self.assertLess(np.linalg.norm(beta0 - ml_fit.params, 2), 2.0)
예제 #11
0
    def setup_class(cls):
        # copy-paste except for model
        nobs, k_vars = 500, 5

        np.random.seed(786452)
        x = np.random.randn(nobs, k_vars)
        x[:, 0] = 1
        x2 = np.random.randn(nobs, 2)
        xx = np.column_stack((x, x2))

        if cls.dispersed:
            het = np.random.randn(nobs)
            y = np.random.poisson(np.exp(x.sum(1) * 0.5 + het))
            #y_mc = np.random.negative_binomial(np.exp(x.sum(1) * 0.5), 2)
        else:
            y = np.random.poisson(np.exp(x.sum(1) * 0.5))

        cls.exog_extra = x2
        cls.model_full = Poisson(y, xx)
        cls.model_drop = Poisson(y, x)
예제 #12
0
    def _initialize(cls):
        y, x = cls.y, cls.x

        modp = Poisson(y, x)
        cls.res2 = modp.fit(disp=0)

        mod = PoissonPenalized(y, x)
        mod.pen_weight = 0
        cls.res1 = mod.fit(method='bfgs', maxiter=100, disp=0)

        cls.atol = 5e-6
예제 #13
0
    def setup_class(cls):
        df = data_bin
        mod = GLM(df['constrict'], df[['const', 'log_rate', 'log_volumne']],
                  family=families.Poisson())
        res = mod.fit(attach_wls=True, atol=1e-10)
        from statsmodels.discrete.discrete_model import Poisson
        mod2 = Poisson(df['constrict'],
                       df[['const', 'log_rate', 'log_volumne']])
        res2 = mod2.fit(tol=1e-10)

        cls.infl0 = res.get_influence()
        cls.infl1 = res2.get_influence()
예제 #14
0
    def _initialize(cls):
        y, x = cls.y, cls.x
        modp = Poisson(y, x[:, :cls.k_nonzero])
        cls.res2 = modp.fit(disp=0)

        mod = PoissonPenalized(y, x, penal=cls.penalty)
        mod.pen_weight *= 1.5
        mod.penal.tau = 0.05
        cls.res1 = mod.fit(method='bfgs', maxiter=100, disp=0)

        cls.exog_index = slice(None, cls.k_nonzero, None)

        cls.atol = 5e-3
예제 #15
0
    def setup_class(cls):
        # here we don't need to check convergence from default start_params
        start_params = [
            14.1709, 0.7085, -3.4548, -0.539, 3.2368, -7.9299, -5.0529
        ]
        mod_poi = Poisson(endog, exog)
        res_poi = mod_poi.fit(start_params=start_params)
        marge_poi = res_poi.get_margeff(dummy=True)
        cls.res = res_poi
        cls.margeff = marge_poi

        cls.res1_slice = [0, 1, 2, 3, 5, 6]
        cls.res1 = res_stata.results_poisson_margins_dummy
예제 #16
0
    def setup_class(cls):
        # here we don't need to check convergence from default start_params
        start_params = [
            14.1709, 0.7085, -3.4548, -0.539, 3.2368, -7.9299, -5.0529
        ]
        mod_poi = Poisson(endog, exog)
        res_poi = mod_poi.fit(start_params=start_params)
        #res_poi = mod_poi.fit(maxiter=100)
        marge_poi = res_poi.get_margeff()
        cls.res = res_poi
        cls.margeff = marge_poi

        cls.rtol_fac = 1
        cls.res1_slice = slice(None, None, None)
        cls.res1 = res_stata.results_poisson_margins_cont
예제 #17
0
    def __init__(self):

        # generate artificial data
        np.random.seed(98765678)
        nobs = 200
        rvs = np.random.randn(nobs,6)
        data_exog = rvs
        data_exog = sm.add_constant(data_exog, prepend=False)
        xbeta = 0.1 + 0.1*rvs.sum(1)
        data_endog = np.random.poisson(np.exp(xbeta))

        #estimate discretemod.Poisson as benchmark
        self.res_discrete = Poisson(data_endog, data_exog).fit(disp=0)

        mod_glm = sm.GLM(data_endog, data_exog, family=sm.families.Poisson())
        self.res_glm = mod_glm.fit()

        #estimate generic MLE
        self.mod = PoissonGMLE(data_endog, data_exog)
        self.res = self.mod.fit(start_params=0.9 * self.res_discrete.params,
                                method='nm', disp=0)
예제 #18
0
    def fit(self, rs: RecordSet) -> None:
        """
		fit a Probit regression mdl

		:param rs: The record set to fit with.
		"""
        # set params
        self.data = cp.deepcopy(rs)
        patterns = self.data.entries[:, :-1]
        out = self.data.entries[:, -1:]

        if self.add_intercept:
            intercept = np.ones((patterns.shape[0], 1))
            patterns = np.hstack((intercept, patterns))

        # avoid error
        if self.alpha == 0:
            raise Exception("Alpha Probit too low to obtain reliable results")

        self.model = Poisson(endog=out.ravel(), exog=patterns)
        self.model = self.model.fit_regularized(alpha=self.alpha,
                                                maxiter=10e8,
                                                disp=False)
예제 #19
0
    def test_spec_tests(self):
        # regression test, numbers similar to Monte Carlo simulation
        res_dispersion = np.array([[0.1396096387543, 0.8889684245877],
                                   [0.1396096387543, 0.8889684245877],
                                   [0.2977840351238, 0.7658680002106],
                                   [0.1307899995877, 0.8959414342111],
                                   [0.1307899995877, 0.8959414342111],
                                   [0.1357101381056, 0.8920504328246],
                                   [0.2776587511235, 0.7812743277372]])

        res_zi = np.array([
            [00.1389582826821, 0.7093188241734],
            [-0.3727710861669, 0.7093188241734],
            [-0.2496729648642, 0.8028402670888],
            [00.0601651553909, 0.8062350958880],
        ])

        respoi = Poisson(self.endog, self.exog).fit(disp=0)
        dia = PoissonDiagnostic(respoi)
        t_disp = dia.test_dispersion()[0]
        assert_allclose(t_disp, res_dispersion, rtol=1e-8)

        nobs = self.endog.shape[0]
        t_zi_jh = dia.test_poisson_zeroinflation(method="broek",
                                                 exog_infl=np.ones(nobs))
        t_zib = dia.test_poisson_zeroinflation(method="broek")
        t_zim = dia.test_poisson_zeroinflation(method="prob")
        t_zichi2 = dia.test_chisquare_prob(bin_edges=np.arange(3))

        t_zi = np.vstack([t_zi_jh[:2], t_zib[:2], t_zim[:2], t_zichi2[:2]])
        assert_allclose(t_zi, res_zi, rtol=1e-8)

        # test jansakul and hinde with exog_infl
        t_zi_ex = dia.test_poisson_zeroinflation(method="broek",
                                                 exog_infl=self.exog)
        res_zi_ex = np.array([3.7813218150779, 0.1509719973257])
        assert_allclose(t_zi_ex[:2], res_zi_ex, rtol=1e-8)
예제 #20
0
def test_poisson_screening():

    np.random.seed(987865)

    y, x, idx_nonzero_true, beta = _get_poisson_data()
    nobs = len(y)

    xnames_true = ['var%4d' % ii for ii in idx_nonzero_true]
    xnames_true[0] = 'const'
    parameters = pd.DataFrame(beta[idx_nonzero_true],
                              index=xnames_true,
                              columns=['true'])

    xframe_true = pd.DataFrame(x[:, idx_nonzero_true], columns=xnames_true)
    res_oracle = Poisson(y, xframe_true).fit()
    parameters['oracle'] = res_oracle.params

    mod_initial = PoissonPenalized(y, np.ones(nobs), pen_weight=nobs * 5)

    screener = VariableScreening(mod_initial)
    exog_candidates = x[:, 1:]
    res_screen = screener.screen_exog(exog_candidates, maxiter=10)

    assert_equal(np.sort(res_screen.idx_nonzero), idx_nonzero_true)

    xnames = ['var%4d' % ii for ii in res_screen.idx_nonzero]
    xnames[0] = 'const'

    # smoke test
    res_screen.results_final.summary(xname=xnames)
    res_screen.results_pen.summary()
    assert_equal(res_screen.results_final.mle_retvals['converged'], True)

    ps = pd.Series(res_screen.results_final.params, index=xnames, name='final')
    parameters = parameters.join(ps, how='outer')

    assert_allclose(parameters['oracle'], parameters['final'], atol=5e-6)
예제 #21
0
nobs, k_vars = 500, 20
k_nonzero = 4
x = (np.random.rand(nobs, k_vars) + 0.5 *
     (np.random.rand(nobs, 1) - 0.5)) * 2 - 1
x *= 1.2
x[:, 0] = 1
beta = np.zeros(k_vars)
beta[:k_nonzero] = 1. / np.arange(1, k_nonzero + 1)
linpred = x.dot(beta)
mu = np.exp(linpred)
y = np.random.poisson(mu)
import os
debug = raw_input("please attach to pid:{},then press any key".format(
    os.getpid()))
modp = Poisson(y, x)
resp = modp.fit()
print(resp.params)

mod = PoissonPenalized(y, x)
res = mod.fit(method='bfgs', maxiter=1000)
print(res.params)

############### Penalized Probit
y_star = linpred + 0.25 * np.random.randn(nobs)
y2 = (y_star > 0.75).astype(float)
y_star.mean(), y2.mean()

res0 = Probit(y2, x).fit()
print(res0.summary())
res_oracle = Probit(y2, x[:, :k_nonzero]).fit()
예제 #22
0
    X = sm.add_constant(X)

    # general OLS
    # https://www.statsmodels.org/stable/generated/statsmodels.regression.linear_model.OLS.html
    # model=sm.OLS(Y, X.astype(float))

    # robust regression
    # https://www.statsmodels.org/stable/generated/statsmodels.robust.robust_linear_model.RLM.html
    # model=sm.RLM(Y, X.astype(float))

    # probit model
    # https://www.statsmodels.org/stable/generated/statsmodels.discrete.discrete_model.Probit.html
    # model = Probit(Y, X.astype(float))

    # logit model
    # https://www.statsmodels.org/stable/generated/statsmodels.discrete.discrete_model.Logit.html
    # model = Logit(Y, X.astype(float))

    # poisson model
    # https://www.statsmodels.org/stable/generated/statsmodels.formula.api.poisson.html
    model = Poisson(Y, X.astype(float))

    final_model = model.fit()
    results_summary = final_model.summary()
    print(results_summary)
    results_as_html = results_summary.tables[1].as_html()
    result_df = pd.read_html(results_as_html, header=0, index_col=0)[0]

    print(result_df.to_latex())
예제 #23
0
#!/usr/bin/env python
# coding: utf-8

# In[ ]:


from statsmodels.discrete.discrete_model import Poisson
model =Poisson(endog=doi.Infections.astype(float), exog=add_constant(doi.CYPOPDENS.astype(float))) #Endog is the dependent variable here
results = model.fit()
print(results.summary())  


# In[ ]:


DENSCOEF = 1 - np.exp(.0007)    #.0007 is the coefficient of our endogenous variable of interest
print('CYPOPDENS coefficent exponetiated: {} '.format(np.exp(DENSCOEF)))  #outputs workable percentage

예제 #24
0
clf.intercept_

model = OLS(y, add_constant(x))
model_fit = model.fit()
model_fit.summary()


def estimator(x, row_in='Crashes'):
    estimated = lambda row: exp(x[0] + x[1] * row['AADT'] + x[2] * row['L'])
    df['estimated'] = df.apply(estimated, axis=1)
    #probability = lambda row: (row['estimated']**row[row_in] * exp(-row['estimated'])) / factorial(row[row_in])
    probability = lambda row: poisson.pmf(row[row_in], row['estimated'])
    df['probability'] = df.apply(probability, axis=1)
    product = df['probability'].product()
    return (-product)


x0 = [1.6, .0000026, .032]
estimator(x0)
optimize.minimize(estimator,
                  x0,
                  method='nelder-mead',
                  options={
                      'xtol': 1e-8,
                      'disp': True
                  })

model = Poisson(y.as_matrix().transpose(), add_constant(x))
model_fit = model.fit(start_params=x0)
model_fit.summary()
import numpy as np

np.unique(V, return_counts=True)

# In[84]:

import statsmodels

U_Const = statsmodels.tools.add_constant(U)

# In[85]:

from statsmodels.discrete.discrete_model import Poisson

mpr = Poisson(V, U_Const)
res_mpr = mpr.fit()

# In[93]:

from statsmodels.genmod.generalized_linear_model import GLM
from statsmodels.genmod import families

mod = GLM(V, U_Const, family=families.Poisson())
res = mod.fit()
print(res.summary())

# ### La surdispersion

# In[95]:
예제 #26
0
 def _train_model(self, predictors, y_array):
     y_1darray = numpy.squeeze(y_array)
     self.poisson = Poisson(y_1darray, predictors)
     self.pos_result = self.poisson.fit(method='bfgs')
예제 #27
0
    "station_diur_temp_rng_c", "precipitation_amt_mm",
    "reanalysis_dew_point_temp_k", "reanalysis_air_temp_k",
    "reanalysis_relative_humidity_percent",
    "reanalysis_specific_humidity_g_per_kg", "reanalysis_precip_amt_kg_per_m2",
    "reanalysis_max_air_temp_k", "reanalysis_min_air_temp_k",
    "reanalysis_avg_temp_k", "reanalysis_tdtr_k", "ndvi_se", "ndvi_sw",
    "ndvi_ne", "ndvi_nw"
]
n_features = len(features_list)

df_train_features = df_train_features.fillna(df_train_features.mean())
df_test_features = df_test_features.fillna(df_test_features.mean())

X_train = df_train_features[features_list].values
X_test = df_test_features[features_list].values

y_train = df_train_labels["total_cases"].values

# Model:
poisson_mod = Poisson(endog=y_train, exog=X_train).fit(maxiter=61)

print(poisson_mod.summary())

predictions = poisson_mod.predict(X_test)
predictions_rounded = np.rint(predictions).astype(np.int64)
print(predictions_rounded)

write_result(predictions_rounded,
             "/poisson.csv",
             sample_source=sample_submission_path,
             write_source=predictions_path)
예제 #28
0
 def fit(self):
     # Create scaler and scale X
     self.scaler = StandardScaler(with_mean=False)
     self.X = self.scaler.fit_transform(self.X)
     # Fit Poisson model to data
     self.poisson_model = Poisson(self.y, self.X).fit()