def test_poisson_residuals(): nobs, k_exog = 100, 5 np.random.seed(987125) x = np.random.randn(nobs, k_exog - 1) x = add_constant(x) y_true = x.sum(1) / 2 y = y_true + 2 * np.random.randn(nobs) exposure = 1 + np.arange(nobs) // 4 yp = np.random.poisson(np.exp(y_true) * exposure) yp[10:15] += 10 fam = sm.families.Poisson() mod_poi_e = GLM(yp, x, family=fam, exposure=exposure) res_poi_e = mod_poi_e.fit() mod_poi_w = GLM(yp / exposure, x, family=fam, var_weights=exposure) res_poi_w = mod_poi_w.fit() assert_allclose(res_poi_e.resid_response / exposure, res_poi_w.resid_response) assert_allclose(res_poi_e.resid_pearson, res_poi_w.resid_pearson) assert_allclose(res_poi_e.resid_deviance, res_poi_w.resid_deviance) with warnings.catch_warnings(): warnings.simplefilter("ignore", category=FutureWarning) assert_allclose(res_poi_e.resid_anscombe, res_poi_w.resid_anscombe) assert_allclose(res_poi_e.resid_anscombe_unscaled, res_poi_w.resid_anscombe)
def test_poisson_residuals(): nobs, k_exog = 100, 5 np.random.seed(987125) x = np.random.randn(nobs, k_exog - 1) x = add_constant(x) y_true = x.sum(1) / 2 y = y_true + 2 * np.random.randn(nobs) exposure = 1 + np.arange(nobs) // 4 yp = np.random.poisson(np.exp(y_true) * exposure) yp[10:15] += 10 fam = sm.families.Poisson() mod_poi_e = GLM(yp, x, family=fam, exposure=exposure) res_poi_e = mod_poi_e.fit() mod_poi_w = GLM(yp / exposure, x, family=fam, var_weights=exposure) res_poi_w = mod_poi_w.fit() assert_allclose(res_poi_e.resid_response / exposure, res_poi_w.resid_response) assert_allclose(res_poi_e.resid_pearson, res_poi_w.resid_pearson) assert_allclose(res_poi_e.resid_deviance, res_poi_w.resid_deviance) with warnings.catch_warnings(): warnings.simplefilter("ignore", category=FutureWarning) assert_allclose(res_poi_e.resid_anscombe, res_poi_w.resid_anscombe) assert_allclose(res_poi_e.resid_anscombe_unscaled, res_poi_w.resid_anscombe)
def test_glm(self): # prelimnimary, getting started with basic test for GLM.get_prediction from statsmodels.genmod.generalized_linear_model import GLM res_wls = self.res_wls mod_wls = res_wls.model y, X, wi = mod_wls.endog, mod_wls.exog, mod_wls.weights w_sqrt = np.sqrt(wi) # notation wi is weights, `w` is var mod_glm = GLM(y * w_sqrt, X * w_sqrt[:,None]) # compare using t distribution res_glm = mod_glm.fit(use_t=True) pred_glm = res_glm.get_prediction() sf_glm = pred_glm.summary_frame() pred_res_wls = res_wls.get_prediction() sf_wls = pred_res_wls.summary_frame() n_compare = 30 # in glm with predict wendog assert_allclose(sf_glm.values[:n_compare], sf_wls.values[:n_compare, :4]) # compare using normal distribution res_glm = mod_glm.fit() # default use_t=False pred_glm = res_glm.get_prediction() sf_glm = pred_glm.summary_frame() res_wls = mod_wls.fit(use_t=False) pred_res_wls = res_wls.get_prediction() sf_wls = pred_res_wls.summary_frame() assert_allclose(sf_glm.values[:n_compare], sf_wls.values[:n_compare, :4]) # function for parameter transformation # should be separate test method from statsmodels.genmod._prediction import params_transform_univariate rates = params_transform_univariate(res_glm.params, res_glm.cov_params()) rates2 = np.column_stack((np.exp(res_glm.params), res_glm.bse * np.exp(res_glm.params), np.exp(res_glm.conf_int()))) assert_allclose(rates.summary_frame().values, rates2, rtol=1e-13) from statsmodels.genmod.families import links # with identity transform pt = params_transform_univariate(res_glm.params, res_glm.cov_params(), link=links.identity()) assert_allclose(pt.tvalues, res_glm.tvalues, rtol=1e-13) assert_allclose(pt.se_mean, res_glm.bse, rtol=1e-13) ptt = pt.t_test() assert_allclose(ptt[0], res_glm.tvalues, rtol=1e-13) assert_allclose(ptt[1], res_glm.pvalues, rtol=1e-13) # prediction with exog and no weights does not error res_glm = mod_glm.fit() pred_glm = res_glm.get_prediction(X)
def setup_class(cls): endog_bin = (endog > endog.mean()).astype(int) cls.cov_type = 'cluster' mod1 = GLM(endog_bin, exog, family=families.Gaussian(link=links.CDFLink())) cls.res1 = mod1.fit(cov_type='cluster', cov_kwds=dict(groups=group)) mod1 = smd.Probit(endog_bin, exog) cls.res2 = mod1.fit(cov_type='cluster', cov_kwds=dict(groups=group))
def test_glm(self): # prelimnimary, getting started with basic test for GLM.get_prediction from statsmodels.genmod.generalized_linear_model import GLM res_wls = self.res_wls mod_wls = res_wls.model y, X, wi = mod_wls.endog, mod_wls.exog, mod_wls.weights w_sqrt = np.sqrt(wi) # notation wi is weights, `w` is var mod_glm = GLM(y * w_sqrt, X * w_sqrt[:, None]) # compare using t distribution res_glm = mod_glm.fit(use_t=True) pred_glm = res_glm.get_prediction() sf_glm = pred_glm.summary_frame() pred_res_wls = res_wls.get_prediction() sf_wls = pred_res_wls.summary_frame() n_compare = 30 # in glm with predict wendog assert_allclose(sf_glm.values[:n_compare], sf_wls.values[:n_compare, :4]) # compare using normal distribution res_glm = mod_glm.fit() # default use_t=False pred_glm = res_glm.get_prediction() sf_glm = pred_glm.summary_frame() res_wls = mod_wls.fit(use_t=False) pred_res_wls = res_wls.get_prediction() sf_wls = pred_res_wls.summary_frame() assert_allclose(sf_glm.values[:n_compare], sf_wls.values[:n_compare, :4]) # function for parameter transformation # should be separate test method from statsmodels.genmod._prediction import params_transform_univariate rates = params_transform_univariate(res_glm.params, res_glm.cov_params()) rates2 = np.column_stack( (np.exp(res_glm.params), res_glm.bse * np.exp(res_glm.params), np.exp(res_glm.conf_int()))) assert_allclose(rates.summary_frame().values, rates2, rtol=1e-13) from statsmodels.genmod.families import links # with identity transform pt = params_transform_univariate(res_glm.params, res_glm.cov_params(), link=links.identity()) assert_allclose(pt.tvalues, res_glm.tvalues, rtol=1e-13) assert_allclose(pt.se_mean, res_glm.bse, rtol=1e-13) ptt = pt.t_test() assert_allclose(ptt[0], res_glm.tvalues, rtol=1e-13) assert_allclose(ptt[1], res_glm.pvalues, rtol=1e-13)
def setup_class(cls): endog_bin = (endog > endog.mean()).astype(int) cls.cov_type = 'cluster' mod1 = GLM(endog_bin, exog, family=families.Gaussian(link=links.CDFLink())) cls.res1 = mod1.fit(cov_type='cluster', cov_kwds=dict(groups=group)) mod1 = smd.Probit(endog_bin, exog) cls.res2 = mod1.fit(cov_type='cluster', cov_kwds=dict(groups=group))
def setup_class(cls): endog_bin = (endog > endog.mean()).astype(int) cls.cov_type = 'cluster' mod1 = GLM(endog_bin, exog, family=families.Binomial()) cls.res1 = mod1.fit(cov_type='cluster', cov_kwds=dict(groups=group)) mod1 = smd.Logit(endog_bin, exog) cls.res2 = mod1.fit(cov_type='cluster', cov_kwds=dict(groups=group))
def setup_class(cls): endog_bin = (endog > endog.mean()).astype(int) cls.cov_type = 'cluster' mod1 = GLM(endog_bin, exog, family=families.Binomial()) cls.res1 = mod1.fit(cov_type='cluster', cov_kwds=dict(groups=group)) mod1 = smd.Logit(endog_bin, exog) cls.res2 = mod1.fit(cov_type='cluster', cov_kwds=dict(groups=group))
def setup_class(cls): endog_bin = (endog > endog.mean()).astype(int) cls.cov_type = 'cluster' mod1 = GLM(endog_bin, exog, family=families.Binomial(link=links.probit())) cls.res1 = mod1.fit(method='newton', cov_type='cluster', cov_kwds=dict(groups=group)) mod1 = smd.Probit(endog_bin, exog) cls.res2 = mod1.fit(cov_type='cluster', cov_kwds=dict(groups=group)) cls.rtol = 1e-6
def setup_class(cls): np.random.seed(987125643) # not intentional seed endog_count = np.random.poisson(endog) cls.cov_type = 'HC0' mod1 = GLM(endog_count, exog, family=families.Poisson()) cls.res1 = mod1.fit(cov_type='HC0') mod1 = smd.Poisson(endog_count, exog) cls.res2 = mod1.fit(cov_type='HC0') cls.res1.rtol = 1e-11
def setup_class(cls): cls.cov_type = 'hac-groupsum' # time index is just made up to have a test case time = np.tile(np.arange(7), 5)[:-1] mod1 = GLM(endog, exog, family=families.Gaussian()) kwds = dict(time=pd.Series(time), # check for #3606 maxlags=2, use_correction='hac', df_correction=False) cls.res1 = mod1.fit(cov_type='hac-groupsum', cov_kwds=kwds) cls.res1b = mod1.fit(cov_type='nw-groupsum', cov_kwds=kwds) mod2 = OLS(endog, exog) cls.res2 = mod2.fit(cov_type='hac-groupsum', cov_kwds=kwds)
def setup_class(cls): cls.cov_type = 'hac-groupsum' # time index is just made up to have a test case time = np.tile(np.arange(7), 5)[:-1] mod1 = GLM(endog, exog, family=families.Gaussian()) kwds = dict(time=time, maxlags=2, use_correction='hac', df_correction=False) cls.res1 = mod1.fit(cov_type='hac-groupsum', cov_kwds=kwds) cls.res1b = mod1.fit(cov_type='nw-groupsum', cov_kwds=kwds) mod2 = OLS(endog, exog) cls.res2 = mod2.fit(cov_type='hac-groupsum', cov_kwds=kwds)
def test_basic(self): res1 = self.res1 res2 = self.res2 assert_allclose(self.eff, res2.TE, rtol=1e-13) assert_allclose(self.var_eff, res2.seTE**2, rtol=1e-13) assert_allclose(res1.mean_effect_fe, res2.TE_fixed, rtol=1e-13) # R meta does not adjust sd FE for HKSJ assert_allclose(res1.sd_eff_w_fe, res2.seTE_fixed, rtol=1e-13) assert_allclose(res1.q, res2.Q, rtol=1e-13) assert_allclose(res1.tau2, res2.tau2, rtol=1e-10) assert_allclose(res1.mean_effect_re, res2.TE_random, rtol=1e-13) assert_allclose(res1.sd_eff_w_re_hksj, res2.seTE_random, rtol=1e-13) th = res1.test_homogeneity() q, pv = th df = th.df assert_allclose(q, res2.Q, rtol=1e-13) assert_allclose(pv, res2.pval_Q, rtol=1e-13) assert_allclose(df, res2.df_Q, rtol=1e-13) assert_allclose(res1.i2, res2.I2, rtol=1e-13) assert_allclose(res1.h2, res2.H**2, rtol=1e-13) ci = res1.conf_int(use_t=True) # fe, re, fe_wls, re_wls # R meta does not adjust FE for HKSJ, still uses normal dist # assert_allclose(ci[0][0], res2.lower_fixed, atol=1e-10) # assert_allclose(ci[0][1], res2.upper_fixed, atol=1e-10) assert_allclose(ci[3][0], res2.lower_random, rtol=1e-13) assert_allclose(ci[3][1], res2.upper_random, rtol=1e-10) ci = res1.conf_int(use_t=False) # fe, re, fe_wls, re_wls assert_allclose(ci[0][0], res2.lower_fixed, rtol=1e-13) assert_allclose(ci[0][1], res2.upper_fixed, rtol=1e-13) weights = 1 / self.var_eff mod_glm = GLM(self.eff, np.ones(len(self.eff)), var_weights=weights) res_glm = mod_glm.fit() assert_allclose(res_glm.params, res2.TE_fixed, rtol=1e-13) weights = 1 / (self.var_eff + res1.tau2) mod_glm = GLM(self.eff, np.ones(len(self.eff)), var_weights=weights) res_glm = mod_glm.fit() assert_allclose(res_glm.params, res2.TE_random, rtol=1e-13)
def setup_class(cls): cls.cov_type = 'hac-panel' # time index is just made up to have a test case time = np.tile(np.arange(7), 5)[:-1] mod1 = GLM(endog.copy(), exog.copy(), family=families.Gaussian()) kwds = dict(time=time, maxlags=2, kernel=sw.weights_uniform, use_correction='hac', df_correction=False) cls.res1 = mod1.fit(cov_type='hac-panel', cov_kwds=kwds) cls.res1b = mod1.fit(cov_type='nw-panel', cov_kwds=kwds) mod2 = OLS(endog, exog) cls.res2 = mod2.fit(cov_type='hac-panel', cov_kwds=kwds)
def init(cls): cls.res2 = cls.mod2.fit() mod = GLM(cls.endog, cls.exogc, offset=0.5 * cls.exog[:, cls.idx_c].squeeze()) mod.exog_names[:] = ['const', 'x2', 'x3', 'x4'] cls.res1 = mod.fit() cls.idx_p_uc = np.arange(cls.exogc.shape[1])
def _initialize(cls): y, x = cls.y, cls.x offset = -0.25 * np.ones(len(y)) # also check offset cov_type = 'HC0' modp = GLM(y, x[:, :cls.k_nonzero], family=family.Binomial(), offset=offset) cls.res2 = modp.fit(cov_type=cov_type, method='newton', maxiter=1000, disp=0) mod = GLMPenalized(y, x, family=family.Binomial(), offset=offset, penal=cls.penalty) mod.pen_weight *= 1 # lower than in other cases mod.penal.tau = 0.05 cls.res1 = mod.fit(cov_type=cov_type, method='bfgs', max_start_irls=0, maxiter=100, disp=0, trim=0.001) cls.exog_index = slice(None, cls.k_nonzero, None) cls.atol = 1e-3 cls.k_params = cls.k_nonzero
def setup_class(cls): fweights = [1, 1, 1, 2, 2, 2, 3, 3, 3, 1, 1, 1, 2, 2, 2, 3, 3] # faking aweights by using normalized freq_weights fweights = np.array(fweights) wsum = fweights.sum() nobs = len(cpunish_data.endog) aweights = fweights / wsum * nobs gid = np.arange(1, 17 + 1) // 2 n_groups = len(np.unique(gid)) # no wnobs yet in sandwich covariance calcualtion cls.corr_fact = 1 / np.sqrt( n_groups / (n_groups - 1)) #np.sqrt((wsum - 1.) / wsum) cov_kwds = {'groups': gid, 'use_correction': False} with pytest.warns(None): mod = GLM(cpunish_data.endog, cpunish_data.exog, family=sm.families.Poisson(), freq_weights=fweights) cls.res1 = mod.fit(cov_type='cluster', cov_kwds=cov_kwds) # compare with discrete, start close to save time #modd = discrete.Poisson(cpunish_data.endog, cpunish_data.exog) cls.res2 = res_stata.results_poisson_fweight_clu1
def _initialize(cls): y, x = cls.y, cls.x # adding 10 to avoid strict rtol at predicted values close to zero y = y + 10 cov_type = 'HC0' modp = GLM(y, x[:, :cls.k_nonzero], family=family.Gaussian()) cls.res2 = modp.fit(cov_type=cov_type, method='bfgs', maxiter=100, disp=0) weights = (np.arange(x.shape[1]) >= 4).astype(float) mod = GLMPenalized(y, x, family=family.Gaussian(), penal=smpen.L2ContraintsPenalty(weights=weights)) # make pen_weight large to force redundant to close to zero mod.pen_weight *= 500 cls.res1 = mod.fit(cov_type=cov_type, method='bfgs', maxiter=100, disp=0, trim=False) cls.exog_index = slice(None, cls.k_nonzero, None) cls.k_params = x.shape[1] cls.atol = 1e-5 cls.rtol = 1e-5
def init(cls): cls.res2 = cls.mod2.fit() mod = GLM(cls.endog, cls.exogc, offset=0.5 * cls.exog[:, cls.idx_c].squeeze()) mod.exog_names[:] = ['const', 'x2', 'x3', 'x4'] cls.res1 = mod.fit() cls.idx_p_uc = np.arange(cls.exogc.shape[1])
def kfold_cv(self, d, formula, k): n = len(d) d = d.sample(n, replace=False) partition = n // k current, last = 0, partition train_accs = [] test_accs = [] while current < n: if last > n - partition: last = n test = d.iloc[current:last] train = d.drop(test.index) y = train[[self.yvar]] X = self.select_from_design(train.columns).loc[train.index] yt = test[[self.yvar]] Xt = self.select_from_design(test.columns).loc[test.index] shared = list(set(Xt.columns) & set(X.columns)) glm = GLM(y, X[shared], family=sm.families.Binomial()) try: res = glm.fit() train_acc = self._scores_to_accuracy(res, X[shared], y) test_acc = self._scores_to_accuracy(res, Xt[shared], yt) train_accs.append(train_acc) test_accs.append(test_acc) except PerfectSeparationError: print "Perfectly Separated!" current = last last += partition return np.mean(train_accs), np.mean(test_accs)
def ppglmfit(X,Y): ''' The GLM solver in statsmodels is very general. It accepts any link function and expects that, if you want a constant term in your model, that you have already manually added a column of ones to your design matrix. This wrapper simplifies using GLM to fit the common case of a Poisson point-process model, where the constant term has not been explicitly added to the design matrix Parameters ---------- X: N_observations x N_features design matrix. Y: Binary point process observations Returns ------- ÎĽ, B: the offset and parameter estimates for the GLM model. ''' # add constant value to X, if the 1st column is not constant if np.mean(Y)>0.1: print('Caution: spike rate very high, is Poisson assumption valid?') if np.sum(Y)<100: print('Caution: fewer than 100 spikes to fit model') if not all(X[:,0]==X[0,0]): X = np.hstack([np.ones((X.shape[0],1),dtype=X.dtype), X]) poisson_model = GLM(Y,X,family=Poisson()) poisson_results = poisson_model.fit() M = poisson_results.params return M[0],M[1:]
def test_cov_params(): np.random.seed(0) n = 1000 x = np.random.uniform(0, 1, (n, 2)) x = x - x.mean() y = x[:, 0] * x[:, 0] + np.random.normal(0, .01, n) y -= y.mean() bsplines = BSplines(x, degree=[3] * 2, df=[10] * 2, constraints='center') alpha = [0, 0] glm_gam = GLMGam(y, smoother=bsplines, alpha=alpha) res_glm_gam = glm_gam.fit(method='pirls', max_start_irls=0, disp=0, maxiter=5000) glm = GLM(y, bsplines.basis) res_glm = glm.fit() assert_allclose(res_glm.cov_params(), res_glm_gam.cov_params(), rtol=0.0025) alpha = 1e-13 glm_gam = GLMGam(y, smoother=bsplines, alpha=alpha) res_glm_gam = glm_gam.fit(method='pirls', max_start_irls=0, disp=0, maxiter=5000) assert_allclose(res_glm.cov_params(), res_glm_gam.cov_params(), atol=1e-10) res_glm_gam = glm_gam.fit(method='bfgs', max_start_irls=0, disp=0, maxiter=5000, maxfun=5000) assert_allclose(res_glm.cov_params(), res_glm_gam.cov_params(), rtol=1e-4, atol=1e-8)
def ppglmfit(X,Y): ''' The GLM solver in statsmodels is very general. It accepts any link function and expects that, if you want a constant term in your model, that you have already manually added a column of ones to your design matrix. This wrapper simplifies using GLM to fit the common case of a Poisson point-process model, where the constant term has not been explicitly added to the design matrix Args: X: N_observations x N_features design matrix. Y: Binary point process observations Returns: ÎĽ, B: the offset and parameter estimates for the GLM model. ''' # add constant value to X, if the 1st column is not constant if mean(Y)>0.1: print('Caution: spike rate very high, is Poisson assumption valid?') if sum(Y)<100: print('Caution: fewer than 100 spikes to fit model') if not all(X[:,0]==X[0,0]): X = hstack([ ones((shape(X)[0],1),dtype=X.dtype), X]) poisson_model = GLM(Y,X,family=Poisson()) poisson_results = poisson_model.fit() M = poisson_results.params return M[0],M[1:]
def setup_class(cls): import statsmodels.stats.sandwich_covariance as sw cls.cov_type = 'hac-panel' # time index is just made up to have a test case time = np.tile(np.arange(7), 5)[:-1] mod1 = GLM(endog.copy(), exog.copy(), family=families.Gaussian()) kwds = dict(time=time, maxlags=2, kernel=sw.weights_uniform, use_correction='hac', df_correction=False) cls.res1 = mod1.fit(cov_type='hac-panel', cov_kwds=kwds) cls.res1b = mod1.fit(cov_type='nw-panel', cov_kwds=kwds) mod2 = OLS(endog, exog) cls.res2 = mod2.fit(cov_type='hac-panel', cov_kwds=kwds)
def test_cov_params(): np.random.seed(0) n = 1000 x = np.random.uniform(0, 1, (n, 2)) x = x - x.mean() y = x[:, 0] * x[:, 0] + np.random.normal(0, .01, n) y -= y.mean() bsplines = BSplines(x, degree=[3] * 2, df=[10] * 2, constraints='center') alpha = [0, 0] glm_gam = GLMGam(y, smoother=bsplines, alpha=alpha) res_glm_gam = glm_gam.fit(method='pirls', max_start_irls=0, disp=0, maxiter=5000) glm = GLM(y, bsplines.basis) res_glm = glm.fit() assert_allclose(res_glm.cov_params(), res_glm_gam.cov_params(), rtol=0.0025) alpha = 1e-13 glm_gam = GLMGam(y, smoother=bsplines, alpha=alpha) res_glm_gam = glm_gam.fit(method='pirls', max_start_irls=0, disp=0, maxiter=5000) assert_allclose(res_glm.cov_params(), res_glm_gam.cov_params(), atol=1e-10) res_glm_gam = glm_gam.fit(method='bfgs', max_start_irls=0, disp=0, maxiter=5000, maxfun=5000) assert_allclose(res_glm.cov_params(), res_glm_gam.cov_params(), rtol=1e-4, atol=1e-8)
def fit_scores(self, balance=True, nmodels=None, k=3): if not self.formula: # use all columns in the model (untransformed) self.formula = '{} ~ {}'.format(self.yvar, '+'.join(self.xvars)) if self.stepwise: print "Optimizing Forumla via forward stepwise selection..." # use all columns + trasnformed columns in model self.formula, self.swdata = \ self.forward_stepwise(self.balanced_sample(), self.yvar, k=k) if balance: if nmodels is None: # fit mutliple models based on imbalance severity (rounded up to nearest tenth) minor, major = [ self.data[self.data[self.yvar] == i] for i in (self.minority, self.majority) ] nmodels = int(np.ceil((len(major) / len(minor)) / 10) * 10) self.nmodels = nmodels for i in range(nmodels): progress( i + 1, nmodels, prestr="Fitting {} Models on Balanced Samples...".format( nmodels)) # sample from majority to create balance dataset df = self.balanced_sample() y_samp, X_samp = patsy.dmatrices(self.formula, data=df, return_type='dataframe') glm = GLM(y_samp, X_samp, family=sm.families.Binomial()) res = glm.fit() self.model_accurracy.append( self._scores_to_accuracy(res, X_samp, y_samp)) self.models.append(res) print "\nAverage Accuracy:", "{}%".\ format(round(np.mean(self.model_accurracy) * 100, 2)) else: # ignore any imbalance and fit one model self.nmodels = 1 print '\nFitting 1 (Unbalanced) Model...' glm = GLM(self.y, self.X, family=sm.families.Binomial()) res = glm.fit() self.model_accurracy.append( self._scores_to_accuracy(res, self.X, self.y)) self.models.append(res) print "Accuracy", round(np.mean(self.model_accurracy[0]) * 100, 2)
def setup_class(cls): cls.cov_type = 'HC0' mod1 = GLM(endog, exog, family=families.Gaussian()) cls.res1 = mod1.fit(cov_type='HC0') mod2 = OLS(endog, exog) cls.res2 = mod2.fit(cov_type='HC0')
def setup_class(cls): cls.cov_type = 'cluster' mod1 = GLM(endog, exog, family=families.Gaussian()) cls.res1 = mod1.fit(cov_type='cluster', cov_kwds=dict(groups=group)) mod2 = OLS(endog, exog) cls.res2 = mod2.fit(cov_type='cluster', cov_kwds=dict(groups=group))
def setup_class(cls): cls.cov_type = 'HC0' mod1 = GLM(endog, exog, family=families.Gaussian()) cls.res1 = mod1.fit(cov_type='HC0') mod2 = OLS(endog, exog) cls.res2 = mod2.fit(cov_type='HC0')
def setup_class(cls): cls.cov_type = 'cluster' mod1 = GLM(endog, exog, family=families.Gaussian()) cls.res1 = mod1.fit(cov_type='cluster', cov_kwds=dict(groups=group)) mod2 = OLS(endog, exog) cls.res2 = mod2.fit(cov_type='cluster', cov_kwds=dict(groups=group))
def setup_class(cls): cls.res2 = results_st.results_poisson_hc1 mod = GLM(endog, exog, family=families.Poisson()) cls.res1 = mod.fit(cov_type='HC1') cls.bse_rob = cls.res1.bse cls.corr_fact = cls.get_correction_factor(cls.res1, sub_kparams=False)
def init(cls): cov_type = 'HC0' cls.res2 = cls.mod2.fit(cov_type=cov_type) mod = GLM(cls.endog, cls.exogc, offset=0.5 * cls.exog[:, cls.idx_c].squeeze(), var_weights=cls.aweights) mod.exog_names[:] = ['const', 'x2', 'x3', 'x4'] cls.res1 = mod.fit(cov_type=cov_type) cls.idx_p_uc = np.arange(cls.exogc.shape[1])
def fit_poisson(X, Y): """ Fits the Poisson regression model with the training data :param X: the feature matrix :param Y: the label matrix :return: the fitted Poisson model (instance of statsmodels.genmod.generalized_linear_model.GLMResults) """ t = sum(Y, axis=1) pr = GLM(t, X, family=Poisson()) return pr.fit()
def init(cls): cov_type = 'HC0' cls.res2 = cls.mod2.fit(cov_type=cov_type) mod = GLM(cls.endog, cls.exogc, offset=0.5 * cls.exog[:, cls.idx_c].squeeze(), var_weights=cls.aweights) mod.exog_names[:] = ['const', 'x2', 'x3', 'x4'] cls.res1 = mod.fit(cov_type=cov_type) cls.idx_p_uc = np.arange(cls.exogc.shape[1])
def setup_class(cls): cls.res2 = results_st.results_poisson_hc1 mod = GLM(endog, exog, family=families.Poisson()) cls.res1 = mod.fit(cov_type='HC1') cls.bse_rob = cls.res1.bse nobs, k_vars = mod.exog.shape corr_fact = (nobs) / float(nobs - 1.) # for bse we need sqrt of correction factor cls.corr_fact = np.sqrt(1. / corr_fact)
def setup_class(cls): cls.cov_type = 'HAC' kwds = {'maxlags': 2} mod1 = GLM(endog, exog, family=families.Gaussian()) cls.res1 = mod1.fit(cov_type='HAC', cov_kwds=kwds) mod2 = OLS(endog, exog) cls.res2 = mod2.fit(cov_type='HAC', cov_kwds=kwds)
def setup_class(cls): cls.cov_type = 'HAC' kwds={'maxlags':2} mod1 = GLM(endog, exog, family=families.Gaussian()) cls.res1 = mod1.fit(cov_type='HAC', cov_kwds=kwds) mod2 = OLS(endog, exog) cls.res2 = mod2.fit(cov_type='HAC', cov_kwds=kwds)
def setup_class(cls): cls.res2 = results_st.results_poisson_hc1 mod = GLM(endog, exog, family=families.Poisson()) cls.res1 = mod.fit() #res_hc0_ = cls.res1.get_robustcov_results('HC1') get_robustcov_results(cls.res1._results, 'HC1', use_self=True) cls.bse_rob = cls.res1.bse cls.corr_fact = cls.get_correction_factor(cls.res1, sub_kparams=False)
def setup_class(cls): cls.res2 = results_st.results_poisson_hc1 mod = GLM(endog, exog, family=families.Poisson()) cls.res1 = mod.fit(cov_type='HC1') cls.bse_rob = cls.res1.bse nobs, k_vars = mod.exog.shape corr_fact = (nobs) / float(nobs - 1.) # for bse we need sqrt of correction factor cls.corr_fact = np.sqrt(1./corr_fact)
def _initialize(cls): y, x = cls.y, cls.x modp = GLM(y, x, family=family.Poisson()) cls.res2 = modp.fit() mod = GLMPenalized(y, x, family=family.Poisson(), penal=cls.penalty) mod.pen_weight = 0 cls.res1 = mod.fit(method='bfgs', maxiter=100, disp=0) cls.atol = 5e-6
def _initialize(cls): y, x = cls.y, cls.x modp = GLM(y, x, family=family.Poisson()) cls.res2 = modp.fit() mod = GLMPenalized(y, x, family=family.Poisson(), penal=cls.penalty) mod.pen_weight = 0 cls.res1 = mod.fit(method='bfgs', maxiter=100, disp=0) cls.atol = 5e-6
def setup_class(cls): df = data_bin mod = GLM(df['constrict'], df[['const', 'log_rate', 'log_volumne']], family=families.Binomial()) res = mod.fit(method="newton", tol=1e-10) from statsmodels.discrete.discrete_model import Logit mod2 = Logit(df['constrict'], df[['const', 'log_rate', 'log_volumne']]) res2 = mod2.fit(method="newton", tol=1e-10) cls.infl1 = res.get_influence() cls.infl0 = res2.get_influence()
def setup_class(cls): cls.cov_type = 'HAC' # check kernel specified as string kwds = {'kernel': 'bartlett', 'maxlags': 2} mod1 = GLM(endog, exog, family=families.Gaussian()) cls.res1 = mod1.fit(cov_type='HAC', cov_kwds=kwds) mod2 = OLS(endog, exog) kwds2 = {'maxlags': 2} cls.res2 = mod2.fit(cov_type='HAC', cov_kwds=kwds2)
def setup_class(cls): cls.cov_type = 'HAC' kwds={'kernel': sw.weights_uniform, 'maxlags': 2} mod1 = GLM(endog, exog, family=families.Gaussian()) cls.res1 = mod1.fit(cov_type='HAC', cov_kwds=kwds) # check kernel as string mod2 = OLS(endog, exog) kwds2 = {'kernel': 'uniform', 'maxlags': 2} cls.res2 = mod2.fit(cov_type='HAC', cov_kwds=kwds)
def setup_class(cls): cls.res2 = results_st.results_poisson_hc1 mod = GLM(endog, exog, family=families.Poisson()) cls.res1 = mod.fit() #res_hc0_ = cls.res1.get_robustcov_results('HC1') get_robustcov_results(cls.res1._results, 'HC1', use_self=True) cls.bse_rob = cls.res1.bse nobs, k_vars = mod.exog.shape corr_fact = (nobs) / float(nobs - 1.) # for bse we need sqrt of correction factor cls.corr_fact = np.sqrt(1. / corr_fact)
def setup_class(cls): cls.res2 = results_st.results_poisson_hc1 mod = GLM(endog, exog, family=families.Poisson()) cls.res1 = mod.fit() #res_hc0_ = cls.res1.get_robustcov_results('HC1') get_robustcov_results(cls.res1._results, 'HC1', use_self=True) cls.bse_rob = cls.res1.bse nobs, k_vars = mod.exog.shape corr_fact = (nobs) / float(nobs - 1.) # for bse we need sqrt of correction factor cls.corr_fact = np.sqrt(1./corr_fact)
def setup_class(cls): cls.cov_type = 'HAC' kwds={'kernel': sw.weights_uniform, 'maxlags': 2} mod1 = GLM(endog, exog, family=families.Gaussian()) cls.res1 = mod1.fit(cov_type='HAC', cov_kwds=kwds) # check kernel as string mod2 = OLS(endog, exog) kwds2 = {'kernel': 'uniform', 'maxlags': 2} cls.res2 = mod2.fit(cov_type='HAC', cov_kwds=kwds)
def setup_class(cls): df = data_bin mod = GLM(df['constrict'], df[['const', 'log_rate', 'log_volumne']], family=families.Poisson()) res = mod.fit(attach_wls=True, atol=1e-10) from statsmodels.discrete.discrete_model import Poisson mod2 = Poisson(df['constrict'], df[['const', 'log_rate', 'log_volumne']]) res2 = mod2.fit(tol=1e-10) cls.infl0 = res.get_influence() cls.infl1 = res2.get_influence()
def setup_class(cls): cls.cov_type = 'HAC' kwds={'kernel':sw.weights_uniform, 'maxlags':2} mod1 = GLM(endog, exog, family=families.Gaussian()) cls.res1 = mod1.fit(cov_type='HAC', cov_kwds=kwds) mod2 = OLS(endog, exog) cls.res2 = mod2.fit(cov_type='HAC', cov_kwds=kwds) #for debugging cls.res3 = mod2.fit(cov_type='HAC', cov_kwds={'maxlags':2})
def _initialize(cls): y, x = cls.y, cls.x modp = GLM(y, x[:, :cls.k_nonzero], family=family.Binomial()) cls.res2 = modp.fit(disp=0) mod = GLMPenalized(y, x, family=family.Binomial(), penal=cls.penalty) mod.pen_weight *= .5 mod.penal.tau = 0.05 cls.res1 = mod.fit(method='bfgs', maxiter=100, disp=0) cls.exog_index = slice(None, cls.k_nonzero, None) cls.atol = 5e-3
def _initialize(cls): y, x = cls.y, cls.x modp = GLM(y, x[:, :cls.k_nonzero], family=family.Poisson()) cls.res2 = modp.fit() mod = GLMPenalized(y, x, family=family.Poisson(), penal=cls.penalty) mod.pen_weight *= 1.5 # same as discrete Poisson mod.penal.tau = 0.05 cls.res1 = mod.fit(method='bfgs', maxiter=100) cls.exog_index = slice(None, cls.k_nonzero, None) cls.atol = 5e-3
def setup_class(cls): cls.res2 = results_st.results_poisson_clu mod = GLM(endog, exog, family=families.Poisson()) cls.res1 = res1 = mod.fit() get_robustcov_results(cls.res1._results, 'cluster', groups=group, use_correction=True, df_correction=True, #TODO has no effect use_t=False, #True, use_self=True) cls.bse_rob = cls.res1.bse cls.corr_fact = cls.get_correction_factor(cls.res1)
def setup_class(cls): cls.cov_type = 'hac-panel' # time index is just made up to have a test case groups = np.repeat(np.arange(5), 7)[:-1] mod1 = GLM(endog.copy(), exog.copy(), family=families.Gaussian()) kwds = dict(groups=pd.Series(groups), # check for #3606 maxlags=2, kernel=sw.weights_uniform, use_correction='hac', df_correction=False) cls.res1 = mod1.fit(cov_type='hac-panel', cov_kwds=kwds) mod2 = OLS(endog, exog) cls.res2 = mod2.fit(cov_type='hac-panel', cov_kwds=kwds)
def _initialize(cls): y, x = cls.y, cls.x x = x[:, :4] offset = -0.25 * np.ones(len(y)) # also check offset modp = GLM(y, x, family=family.Binomial(), offset=offset) cls.res2 = modp.fit(method='bfgs', max_start_irls=100) mod = GLMPenalized(y, x, family=family.Binomial(), offset=offset, penal=cls.penalty) mod.pen_weight = 0 cls.res1 = mod.fit(method='bfgs', max_start_irls=3, maxiter=100, disp=0, start_params=cls.res2.params*0.9) cls.atol = 1e-10 cls.k_params = 4
def setup_class(cls): fweights = [1, 1, 1, 2, 2, 2, 3, 3, 3, 1, 1, 1, 2, 2, 2, 3, 3] # faking aweights by using normalized freq_weights fweights = np.array(fweights) wsum = fweights.sum() nobs = len(cpunish_data.endog) aweights = fweights / wsum * nobs cls.corr_fact = np.sqrt((wsum - 1.) / wsum) mod = GLM(cpunish_data.endog, cpunish_data.exog, family=sm.families.Poisson(), freq_weights=fweights) cls.res1 = mod.fit(cov_type='HC0') #, cov_kwds={'use_correction':False}) # compare with discrete, start close to save time #modd = discrete.Poisson(cpunish_data.endog, cpunish_data.exog) cls.res2 = res_stata.results_poisson_fweight_hc1
def setup_class(cls): cls.res2 = results_st.results_poisson_clu mod = GLM(endog, exog, family=families.Poisson()) cls.res1 = res1 = mod.fit(cov_type='cluster', cov_kwds=dict(groups=group, use_correction=True, df_correction=True), #TODO has no effect use_t=False, #True, ) cls.bse_rob = cls.res1.bse nobs, k_vars = mod.exog.shape k_params = len(cls.res1.params) #n_groups = len(np.unique(group)) corr_fact = (nobs-1.) / float(nobs - k_params) # for bse we need sqrt of correction factor cls.corr_fact = np.sqrt(corr_fact)
def setup_class(cls): cls.res2 = results_st.results_poisson_clu mod = GLM(endog, exog, family=families.Poisson()) cls.res1 = res1 = mod.fit(cov_type='cluster', cov_kwds=dict(groups=group, use_correction=True, df_correction=True), #TODO has no effect use_t=False, #True, ) # The model results, t_test, ... should also work without # normalized_cov_params, see #2209 # Note: we cannot set on the wrapper res1, we need res1._results cls.res1._results.normalized_cov_params = None cls.bse_rob = cls.res1.bse cls.corr_fact = cls.get_correction_factor(cls.res1)