def _initialize(cls): y, x = cls.y, cls.x # adding 10 to avoid strict rtol at predicted values close to zero y = y + 10 cov_type = 'HC0' modp = GLM(y, x[:, :cls.k_nonzero], family=family.Gaussian()) cls.res2 = modp.fit(cov_type=cov_type, method='bfgs', maxiter=100, disp=0) weights = (np.arange(x.shape[1]) >= 4).astype(float) mod = GLMPenalized(y, x, family=family.Gaussian(), penal=smpen.L2ContraintsPenalty(weights=weights)) # make pen_weight large to force redundant to close to zero mod.pen_weight *= 500 cls.res1 = mod.fit(cov_type=cov_type, method='bfgs', maxiter=100, disp=0, trim=False) cls.exog_index = slice(None, cls.k_nonzero, None) cls.k_params = x.shape[1] cls.atol = 1e-5 cls.rtol = 1e-5
def _initialize(cls): y, x = cls.y, cls.x # adding 10 to avoid strict rtol at predicted values close to zero y = y + 10 k = x.shape[1] cov_type = 'HC0' restriction = np.eye(k)[2:] modp = TheilGLS(y, x, r_matrix=restriction) # the corresponding Theil penweight seems to be 2 * nobs / sigma2_e cls.res2 = modp.fit(pen_weight=120.74564413221599 * 1000, use_t=False) pen = smpen.L2ContraintsPenalty(restriction=restriction) mod = GLMPenalized(y, x, family=family.Gaussian(), penal=pen) # use default weight for GLMPenalized mod.pen_weight *= 1 cls.res1 = mod.fit(cov_type=cov_type, method='bfgs', maxiter=100, disp=0, trim=False) cls.k_nonzero = k cls.exog_index = slice(None, cls.k_nonzero, None) cls.k_params = x.shape[1] cls.atol = 1e-5 cls.rtol = 1e-5
def setup_class(cls): super(_estGAMGaussianLogLink, cls).setup_class() #initialize DGP cls.family = family.Gaussian(links.log) cls.rvs = stats.norm.rvs cls.scale = 5 cls.init()
def __init__(self): super(self.__class__, self).__init__() #initialize DGP self.family = family.Gaussian(links.log) self.rvs = stats.norm.rvs self.scale = 5 self.init()
def test_glmgaussian_screening(): y, x, idx_nonzero_true, beta = _get_gaussian_data() nobs = len(y) # demeaning makes constant zero, checks that exog_keep are not trimmed y = y - y.mean(0) # test uses screener_kwds = dict(pen_weight=nobs * 0.75, threshold_trim=1e-3, ranking_attr='model.score_factor') xnames_true = ['var%4d' % ii for ii in idx_nonzero_true] xnames_true[0] = 'const' parameters = pd.DataFrame(beta[idx_nonzero_true], index=xnames_true, columns=['true']) xframe_true = pd.DataFrame(x[:, idx_nonzero_true], columns=xnames_true) res_oracle = GLMPenalized(y, xframe_true, family=family.Gaussian()).fit() parameters['oracle'] = res_oracle.params for k_keep in [1, 2]: mod_initial = GLMPenalized(y, x[:, :k_keep], family=family.Gaussian()) screener = VariableScreening(mod_initial, **screener_kwds) exog_candidates = x[:, k_keep:] res_screen = screener.screen_exog(exog_candidates, maxiter=30) assert_equal(np.sort(res_screen.idx_nonzero), idx_nonzero_true) xnames = ['var%4d' % ii for ii in res_screen.idx_nonzero] xnames[0] = 'const' # smoke test res_screen.results_final.summary(xname=xnames) res_screen.results_pen.summary() assert_equal(res_screen.results_final.mle_retvals['converged'], True) ps = pd.Series(res_screen.results_final.params, index=xnames, name='final') parameters = parameters.join(ps, how='outer') assert_allclose(parameters['oracle'], parameters['final'], atol=1e-5) # we need to remove 'final' again for next iteration del parameters['final']
def _initialize(cls): y, x = cls.y, cls.x # adding 10 to avoid strict rtol at predicted values close to zero y = y + 10 cov_type = 'HC0' modp = GLM(y, x[:, :cls.k_nonzero], family=family.Gaussian()) cls.res2 = modp.fit(cov_type=cov_type, method='bfgs', maxiter=100, disp=0) mod = GLMPenalized(y, x, family=family.Gaussian(), penal=cls.penalty) mod.pen_weight *= 1.5 # same as discrete Poisson mod.penal.tau = 0.05 cls.res1 = mod.fit(cov_type=cov_type, method='bfgs', maxiter=100, disp=0, trim=True) cls.exog_index = slice(None, cls.k_nonzero, None) cls.k_params = cls.k_nonzero cls.atol = 1e-5 cls.rtol = 1e-5
def setup_class(cls): exog, penalty_matrix, restriction = cls._init() endog = data_mcycle['accel'] pen = smpen.L2ContraintsPenalty(restriction=restriction) mod = GLMPenalized(endog, exog, family=family.Gaussian(), penal=pen) # scaling of penweight in R mgcv s_scale_r = 0.02630734 # set pen_weight to correspond to R mgcv example cls.pw = mod.pen_weight = 1 / s_scale_r / 2 cls.res1 = mod.fit(cov_type=cls.cov_type, method='bfgs', maxiter=100, disp=0, trim=False, scale='x2') cls.res2 = results_pls.pls5 cls.rtol_fitted = 1e-5 # edf is currently not available with PenalizedMixin # need correction for difference in scale denominator cls.covp_corrfact = 1.0025464444310588
def __init__(self, family_name='normal', link_name='identity', fam_params=None): """Constructor.""" # Store link self.link_name = link_name if self.link_name.lower() == 'logit': self.link = L.logit elif self.link_name.lower() == 'log': self.link = L.log elif self.link_name.lower() == 'identity': self.link = L.identity elif self.link_name.lower() == 'sqrt': self.link = L.sqrt elif self.link_name.lower() == 'probit': self.link = L.probit family_kwargs = {} if self.link_name: family_kwargs['link'] = self.link # Store family self.family_name = family_name if self.family_name.lower() == 'normal': self.family = F.Gaussian(**family_kwargs) def rand(x): return np.random.normal(x, fam_params) elif self.family_name.lower() == 'binomial': self.family = F.Binomial(**family_kwargs) def rand(x): return np.random.binomial(1, x) elif self.family_name.lower() == 'poisson': self.family = F.Poisson(**family_kwargs) def rand(x): return np.random.poisson(x) self.rand = rand self.in_columns = None self.out_columns = None