def setup_class(cls): # generate artificial data np.random.seed(98765678) nobs = 200 rvs = np.random.randn(nobs,6) data_exog = rvs data_exog = sm.add_constant(data_exog, prepend=False) xbeta = 1 + 0.1*rvs.sum(1) data_endog = np.random.poisson(np.exp(xbeta)) mod_glm = sm.GLM(data_endog, data_exog, family=sm.families.Poisson()) cls.res_glm = mod_glm.fit() #estimate generic MLE #cls.mod = PoissonGMLE(data_endog, data_exog) #res = cls.mod.fit() #create offset variable based on first exog cls.res_discrete = Poisson(data_endog, data_exog).fit(disp=0) offset = cls.res_discrete.params[0] * data_exog[:,0] #1d ??? #estimate discretemod.Poisson as benchmark, now has offset cls.res_discrete = Poisson(data_endog, data_exog[:,1:], offset=offset).fit(disp=0) mod_glm = sm.GLM(data_endog, data_exog, family=sm.families.Poisson()) cls.res_glm = mod_glm.fit() #cls.res = PoissonOffsetGMLE(data_endog, data_exog[:,1:], offset=offset).fit(start_params = np.ones(6)/2., method='nm') modo = PoissonOffsetGMLE(data_endog, data_exog[:,1:], offset=offset) cls.res = modo.fit(start_params = 0.9*cls.res_discrete.params, method='bfgs', disp=0)
def __init__(self): # generate artificial data np.random.seed(98765678) nobs = 200 rvs = np.random.randn(nobs, 6) data_exog = rvs data_exog = sm.add_constant(data_exog, prepend=False) xbeta = 1 + 0.1 * rvs.sum(1) data_endog = np.random.poisson(np.exp(xbeta)) mod_glm = sm.GLM(data_endog, data_exog, family=sm.families.Poisson()) self.res_glm = mod_glm.fit() #estimate generic MLE #self.mod = PoissonGMLE(data_endog, data_exog) #res = self.mod.fit() #create offset variable based on first exog self.res_discrete = Poisson(data_endog, data_exog).fit(disp=0) offset = self.res_discrete.params[0] * data_exog[:, 0] #1d ??? #estimate discretemod.Poisson as benchmark, now has offset self.res_discrete = Poisson(data_endog, data_exog[:, 1:], offset=offset).fit(disp=0) # Note : ZI has one extra parameter self.res = PoissonZiGMLE( data_endog, data_exog[:, 1:], offset=offset).fit( start_params=np.r_[0.9 * self.res_discrete.params, 10], method='bfgs', disp=0) self.decimal = 4
def setupClass(cls): data = sm.datasets.randhie.load() exog = sm.add_constant(data.exog, prepend=False) cls.res1 = Poisson(data.endog, exog).fit(method='newton', disp=0) res2 = RandHIE() res2.poisson() cls.res2 = res2
def __init__(self, endog, exog, exog_infl=None, offset=None, exposure=None, inflation='logit', missing='none', **kwargs): super(ZeroInflatedPoisson, self).__init__(endog, exog, offset=offset, inflation=inflation, exog_infl=exog_infl, exposure=exposure, missing=missing, **kwargs) self.model_main = Poisson(self.endog, self.exog, offset=offset, exposure=exposure) self.distribution = zipoisson self.result_class = ZeroInflatedPoissonResults self.result_class_wrapper = ZeroInflatedPoissonResultsWrapper self.result_class_reg = L1ZeroInflatedPoissonResults self.result_class_reg_wrapper = L1ZeroInflatedPoissonResultsWrapper
def setup_class(cls): expected_params = [1, 1, 0.5] np.random.seed(987123) nobs = 500 exog = np.ones((nobs, 2)) exog[:nobs // 2, 1] = 0 # offset is used to create misspecification of the model # for predicted probabilities conditional moment test #offset = 0.5 * np.random.randn(nobs) #range_mix = 0.5 #offset = -range_mix / 2 + range_mix * np.random.rand(nobs) offset = 0 mu_true = np.exp(exog.dot(expected_params[:-1]) + offset) endog_poi = np.random.poisson(mu_true / 5) # endog3 = distr.zigenpoisson.rvs(mu_true, 0, # 2, 0.01, size=mu_true.shape) model_poi = Poisson(endog_poi, exog) res_poi = model_poi.fit(method='bfgs', maxiter=5000, maxfun=5000) cls.exog = exog cls.endog = endog_poi cls.res = res_poi cls.nobs = nobs
def poisson_regression(self, endog, exog, clean_data="greedy"): s = self.map_column_to_sheet(endog) arg_endog = endog arg_exog = exog # prepare data v = np.copy(exog) v = np.append(v, endog) dfClean = s.cleanData(v, clean_data) exog = sm.add_constant(dfClean[exog]) endog = dfClean[endog] poisson = Poisson(endog, exog) fit = poisson.fit() utterance = ( "Here are the results of a Poisson regression with endogenous variables " ) utterance = ( utterance + str(arg_endog) + " and exogenous variables " + str(arg_exog) + ".\n" ) utterance = utterance + str(fit.summary()) return QueryResult(fit.summary(), utterance)
def __init__(self): # generate artificial data np.random.seed(98765678) nobs = 200 rvs = np.random.randn(nobs,6) data_exog = rvs data_exog = sm.add_constant(data_exog, prepend=False) xbeta = 1 + 0.1*rvs.sum(1) data_endog = np.random.poisson(np.exp(xbeta)) #estimate discretemod.Poisson as benchmark self.res_discrete = Poisson(data_endog, data_exog).fit(disp=0) mod_glm = sm.GLM(data_endog, data_exog, family=sm.families.Poisson()) self.res_glm = mod_glm.fit() #estimate generic MLE #self.mod = PoissonGMLE(data_endog, data_exog) #res = self.mod.fit() offset = self.res_discrete.params[0] * data_exog[:,0] #1d ??? #self.res = PoissonOffsetGMLE(data_endog, data_exog[:,1:], offset=offset).fit(start_params = np.ones(6)/2., method='nm') modo = PoissonOffsetGMLE(data_endog, data_exog[:,1:], offset=offset) self.res = modo.fit(start_params = 0.9*self.res_discrete.params[1:], method='nm', disp=0)
def test_netchop_improvement(key): res = Poisson( ddf[key].values, add_constant(ddf.method_simultaneous) ).fit() print(res.summary()) return res
def setupClass(cls): from results.results_discrete import RandHIE data = sm.datasets.randhie.load() exog = sm.add_constant(data.exog) cls.res1 = Poisson(data.endog, exog).fit(method='newton', disp=0) res2 = RandHIE() res2.poisson() cls.res2 = res2
def testSimulate(self): np.random.seed(123) beta0 = np.r_[1.1, 2.2, 3.3, 4.4] y, X = poisson.simulate(100, beta0) self.assertEqual(X.shape, (100, 4)) self.assertEqual(y.shape, (100, )) # try to recover params using frequentist regression ml_fit = Poisson(y, X).fit() self.assertLess(np.linalg.norm(beta0 - ml_fit.params, 2), 2.0)
def setup_class(cls): # copy-paste except for model nobs, k_vars = 500, 5 np.random.seed(786452) x = np.random.randn(nobs, k_vars) x[:, 0] = 1 x2 = np.random.randn(nobs, 2) xx = np.column_stack((x, x2)) if cls.dispersed: het = np.random.randn(nobs) y = np.random.poisson(np.exp(x.sum(1) * 0.5 + het)) #y_mc = np.random.negative_binomial(np.exp(x.sum(1) * 0.5), 2) else: y = np.random.poisson(np.exp(x.sum(1) * 0.5)) cls.exog_extra = x2 cls.model_full = Poisson(y, xx) cls.model_drop = Poisson(y, x)
def _initialize(cls): y, x = cls.y, cls.x modp = Poisson(y, x) cls.res2 = modp.fit(disp=0) mod = PoissonPenalized(y, x) mod.pen_weight = 0 cls.res1 = mod.fit(method='bfgs', maxiter=100, disp=0) cls.atol = 5e-6
def setup_class(cls): df = data_bin mod = GLM(df['constrict'], df[['const', 'log_rate', 'log_volumne']], family=families.Poisson()) res = mod.fit(attach_wls=True, atol=1e-10) from statsmodels.discrete.discrete_model import Poisson mod2 = Poisson(df['constrict'], df[['const', 'log_rate', 'log_volumne']]) res2 = mod2.fit(tol=1e-10) cls.infl0 = res.get_influence() cls.infl1 = res2.get_influence()
def _initialize(cls): y, x = cls.y, cls.x modp = Poisson(y, x[:, :cls.k_nonzero]) cls.res2 = modp.fit(disp=0) mod = PoissonPenalized(y, x, penal=cls.penalty) mod.pen_weight *= 1.5 mod.penal.tau = 0.05 cls.res1 = mod.fit(method='bfgs', maxiter=100, disp=0) cls.exog_index = slice(None, cls.k_nonzero, None) cls.atol = 5e-3
def setup_class(cls): # here we don't need to check convergence from default start_params start_params = [ 14.1709, 0.7085, -3.4548, -0.539, 3.2368, -7.9299, -5.0529 ] mod_poi = Poisson(endog, exog) res_poi = mod_poi.fit(start_params=start_params) marge_poi = res_poi.get_margeff(dummy=True) cls.res = res_poi cls.margeff = marge_poi cls.res1_slice = [0, 1, 2, 3, 5, 6] cls.res1 = res_stata.results_poisson_margins_dummy
def setup_class(cls): # here we don't need to check convergence from default start_params start_params = [ 14.1709, 0.7085, -3.4548, -0.539, 3.2368, -7.9299, -5.0529 ] mod_poi = Poisson(endog, exog) res_poi = mod_poi.fit(start_params=start_params) #res_poi = mod_poi.fit(maxiter=100) marge_poi = res_poi.get_margeff() cls.res = res_poi cls.margeff = marge_poi cls.rtol_fac = 1 cls.res1_slice = slice(None, None, None) cls.res1 = res_stata.results_poisson_margins_cont
def __init__(self): # generate artificial data np.random.seed(98765678) nobs = 200 rvs = np.random.randn(nobs,6) data_exog = rvs data_exog = sm.add_constant(data_exog, prepend=False) xbeta = 0.1 + 0.1*rvs.sum(1) data_endog = np.random.poisson(np.exp(xbeta)) #estimate discretemod.Poisson as benchmark self.res_discrete = Poisson(data_endog, data_exog).fit(disp=0) mod_glm = sm.GLM(data_endog, data_exog, family=sm.families.Poisson()) self.res_glm = mod_glm.fit() #estimate generic MLE self.mod = PoissonGMLE(data_endog, data_exog) self.res = self.mod.fit(start_params=0.9 * self.res_discrete.params, method='nm', disp=0)
def fit(self, rs: RecordSet) -> None: """ fit a Probit regression mdl :param rs: The record set to fit with. """ # set params self.data = cp.deepcopy(rs) patterns = self.data.entries[:, :-1] out = self.data.entries[:, -1:] if self.add_intercept: intercept = np.ones((patterns.shape[0], 1)) patterns = np.hstack((intercept, patterns)) # avoid error if self.alpha == 0: raise Exception("Alpha Probit too low to obtain reliable results") self.model = Poisson(endog=out.ravel(), exog=patterns) self.model = self.model.fit_regularized(alpha=self.alpha, maxiter=10e8, disp=False)
def test_spec_tests(self): # regression test, numbers similar to Monte Carlo simulation res_dispersion = np.array([[0.1396096387543, 0.8889684245877], [0.1396096387543, 0.8889684245877], [0.2977840351238, 0.7658680002106], [0.1307899995877, 0.8959414342111], [0.1307899995877, 0.8959414342111], [0.1357101381056, 0.8920504328246], [0.2776587511235, 0.7812743277372]]) res_zi = np.array([ [00.1389582826821, 0.7093188241734], [-0.3727710861669, 0.7093188241734], [-0.2496729648642, 0.8028402670888], [00.0601651553909, 0.8062350958880], ]) respoi = Poisson(self.endog, self.exog).fit(disp=0) dia = PoissonDiagnostic(respoi) t_disp = dia.test_dispersion()[0] assert_allclose(t_disp, res_dispersion, rtol=1e-8) nobs = self.endog.shape[0] t_zi_jh = dia.test_poisson_zeroinflation(method="broek", exog_infl=np.ones(nobs)) t_zib = dia.test_poisson_zeroinflation(method="broek") t_zim = dia.test_poisson_zeroinflation(method="prob") t_zichi2 = dia.test_chisquare_prob(bin_edges=np.arange(3)) t_zi = np.vstack([t_zi_jh[:2], t_zib[:2], t_zim[:2], t_zichi2[:2]]) assert_allclose(t_zi, res_zi, rtol=1e-8) # test jansakul and hinde with exog_infl t_zi_ex = dia.test_poisson_zeroinflation(method="broek", exog_infl=self.exog) res_zi_ex = np.array([3.7813218150779, 0.1509719973257]) assert_allclose(t_zi_ex[:2], res_zi_ex, rtol=1e-8)
def test_poisson_screening(): np.random.seed(987865) y, x, idx_nonzero_true, beta = _get_poisson_data() nobs = len(y) xnames_true = ['var%4d' % ii for ii in idx_nonzero_true] xnames_true[0] = 'const' parameters = pd.DataFrame(beta[idx_nonzero_true], index=xnames_true, columns=['true']) xframe_true = pd.DataFrame(x[:, idx_nonzero_true], columns=xnames_true) res_oracle = Poisson(y, xframe_true).fit() parameters['oracle'] = res_oracle.params mod_initial = PoissonPenalized(y, np.ones(nobs), pen_weight=nobs * 5) screener = VariableScreening(mod_initial) exog_candidates = x[:, 1:] res_screen = screener.screen_exog(exog_candidates, maxiter=10) assert_equal(np.sort(res_screen.idx_nonzero), idx_nonzero_true) xnames = ['var%4d' % ii for ii in res_screen.idx_nonzero] xnames[0] = 'const' # smoke test res_screen.results_final.summary(xname=xnames) res_screen.results_pen.summary() assert_equal(res_screen.results_final.mle_retvals['converged'], True) ps = pd.Series(res_screen.results_final.params, index=xnames, name='final') parameters = parameters.join(ps, how='outer') assert_allclose(parameters['oracle'], parameters['final'], atol=5e-6)
nobs, k_vars = 500, 20 k_nonzero = 4 x = (np.random.rand(nobs, k_vars) + 0.5 * (np.random.rand(nobs, 1) - 0.5)) * 2 - 1 x *= 1.2 x[:, 0] = 1 beta = np.zeros(k_vars) beta[:k_nonzero] = 1. / np.arange(1, k_nonzero + 1) linpred = x.dot(beta) mu = np.exp(linpred) y = np.random.poisson(mu) import os debug = raw_input("please attach to pid:{},then press any key".format( os.getpid())) modp = Poisson(y, x) resp = modp.fit() print(resp.params) mod = PoissonPenalized(y, x) res = mod.fit(method='bfgs', maxiter=1000) print(res.params) ############### Penalized Probit y_star = linpred + 0.25 * np.random.randn(nobs) y2 = (y_star > 0.75).astype(float) y_star.mean(), y2.mean() res0 = Probit(y2, x).fit() print(res0.summary()) res_oracle = Probit(y2, x[:, :k_nonzero]).fit()
X = sm.add_constant(X) # general OLS # https://www.statsmodels.org/stable/generated/statsmodels.regression.linear_model.OLS.html # model=sm.OLS(Y, X.astype(float)) # robust regression # https://www.statsmodels.org/stable/generated/statsmodels.robust.robust_linear_model.RLM.html # model=sm.RLM(Y, X.astype(float)) # probit model # https://www.statsmodels.org/stable/generated/statsmodels.discrete.discrete_model.Probit.html # model = Probit(Y, X.astype(float)) # logit model # https://www.statsmodels.org/stable/generated/statsmodels.discrete.discrete_model.Logit.html # model = Logit(Y, X.astype(float)) # poisson model # https://www.statsmodels.org/stable/generated/statsmodels.formula.api.poisson.html model = Poisson(Y, X.astype(float)) final_model = model.fit() results_summary = final_model.summary() print(results_summary) results_as_html = results_summary.tables[1].as_html() result_df = pd.read_html(results_as_html, header=0, index_col=0)[0] print(result_df.to_latex())
#!/usr/bin/env python # coding: utf-8 # In[ ]: from statsmodels.discrete.discrete_model import Poisson model =Poisson(endog=doi.Infections.astype(float), exog=add_constant(doi.CYPOPDENS.astype(float))) #Endog is the dependent variable here results = model.fit() print(results.summary()) # In[ ]: DENSCOEF = 1 - np.exp(.0007) #.0007 is the coefficient of our endogenous variable of interest print('CYPOPDENS coefficent exponetiated: {} '.format(np.exp(DENSCOEF))) #outputs workable percentage
clf.intercept_ model = OLS(y, add_constant(x)) model_fit = model.fit() model_fit.summary() def estimator(x, row_in='Crashes'): estimated = lambda row: exp(x[0] + x[1] * row['AADT'] + x[2] * row['L']) df['estimated'] = df.apply(estimated, axis=1) #probability = lambda row: (row['estimated']**row[row_in] * exp(-row['estimated'])) / factorial(row[row_in]) probability = lambda row: poisson.pmf(row[row_in], row['estimated']) df['probability'] = df.apply(probability, axis=1) product = df['probability'].product() return (-product) x0 = [1.6, .0000026, .032] estimator(x0) optimize.minimize(estimator, x0, method='nelder-mead', options={ 'xtol': 1e-8, 'disp': True }) model = Poisson(y.as_matrix().transpose(), add_constant(x)) model_fit = model.fit(start_params=x0) model_fit.summary()
import numpy as np np.unique(V, return_counts=True) # In[84]: import statsmodels U_Const = statsmodels.tools.add_constant(U) # In[85]: from statsmodels.discrete.discrete_model import Poisson mpr = Poisson(V, U_Const) res_mpr = mpr.fit() # In[93]: from statsmodels.genmod.generalized_linear_model import GLM from statsmodels.genmod import families mod = GLM(V, U_Const, family=families.Poisson()) res = mod.fit() print(res.summary()) # ### La surdispersion # In[95]:
def _train_model(self, predictors, y_array): y_1darray = numpy.squeeze(y_array) self.poisson = Poisson(y_1darray, predictors) self.pos_result = self.poisson.fit(method='bfgs')
"station_diur_temp_rng_c", "precipitation_amt_mm", "reanalysis_dew_point_temp_k", "reanalysis_air_temp_k", "reanalysis_relative_humidity_percent", "reanalysis_specific_humidity_g_per_kg", "reanalysis_precip_amt_kg_per_m2", "reanalysis_max_air_temp_k", "reanalysis_min_air_temp_k", "reanalysis_avg_temp_k", "reanalysis_tdtr_k", "ndvi_se", "ndvi_sw", "ndvi_ne", "ndvi_nw" ] n_features = len(features_list) df_train_features = df_train_features.fillna(df_train_features.mean()) df_test_features = df_test_features.fillna(df_test_features.mean()) X_train = df_train_features[features_list].values X_test = df_test_features[features_list].values y_train = df_train_labels["total_cases"].values # Model: poisson_mod = Poisson(endog=y_train, exog=X_train).fit(maxiter=61) print(poisson_mod.summary()) predictions = poisson_mod.predict(X_test) predictions_rounded = np.rint(predictions).astype(np.int64) print(predictions_rounded) write_result(predictions_rounded, "/poisson.csv", sample_source=sample_submission_path, write_source=predictions_path)
def fit(self): # Create scaler and scale X self.scaler = StandardScaler(with_mean=False) self.X = self.scaler.fit_transform(self.X) # Fit Poisson model to data self.poisson_model = Poisson(self.y, self.X).fit()