def test_regularized_weights(self): np.random.seed(1432) exog1 = np.random.normal(size=(100, 3)) endog1 = exog1[:, 0] + exog1[:, 1] + np.random.normal(size=100) exog2 = np.random.normal(size=(100, 3)) endog2 = exog2[:, 0] + exog2[:, 1] + np.random.normal(size=100) exog_a = np.vstack((exog1, exog1, exog2)) endog_a = np.concatenate((endog1, endog1, endog2)) # Should be equivalent to exog_a, endog_a. exog_b = np.vstack((exog1, exog2)) endog_b = np.concatenate((endog1, endog2)) wgts = np.ones(200) wgts[0:100] = 2 sigma = np.diag(1/wgts) for L1_wt in 0, 0.5, 1: for alpha in 0, 1: mod1 = OLS(endog_a, exog_a) rslt1 = mod1.fit_regularized(L1_wt=L1_wt, alpha=alpha) mod2 = WLS(endog_b, exog_b, weights=wgts) rslt2 = mod2.fit_regularized(L1_wt=L1_wt, alpha=alpha) mod3 = GLS(endog_b, exog_b, sigma=sigma) rslt3 = mod3.fit_regularized(L1_wt=L1_wt, alpha=alpha) assert_almost_equal(rslt1.params, rslt2.params, decimal=3) assert_almost_equal(rslt1.params, rslt3.params, decimal=3)
def setup_class(cls): data = longley.load(as_pandas=False) data.exog = add_constant(data.exog, prepend=False) cls.res1 = GLS(data.endog, data.exog).fit() cls.res2 = OLS(data.endog, data.exog).fit()
def setup_class(cls): from statsmodels.datasets.ccard import load data = load(as_pandas=False) cls.res1 = WLS(data.endog, data.exog, weights=1 / data.exog[:, 2]).fit() cls.res2 = GLS(data.endog, data.exog, sigma=data.exog[:, 2]).fit()
def fit(self, pen_weight=1., cov_type='sandwich', use_t=True): """Estimate parameters and return results instance Parameters ---------- pen_weight : float penalization factor for the restriction, default is 1. cov_type : string, 'data-prior' or 'sandwich' 'data-prior' assumes that the stochastic restriction reflects a previous sample. The covariance matrix of the parameter estimate is in this case the same form as the one of GLS. The covariance matrix for cov_type='sandwich' treats the stochastic restriction (R and q) as fixed and has a sandwich form analogously to M-estimators. Returns ------- results : TheilRegressionResults instance Notes ----- cov_params for cov_type data-prior, is calculated as .. math:: \\sigma^2 A^{-1} cov_params for cov_type sandwich, is calculated as .. math:: \\sigma^2 A^{-1} (X'X) A^{-1} where :math:`A = X' \\Sigma X + \\lambda \\sigma^2 R' \\Simga_p^{-1} R` :math:`\\sigma^2` is an estimate of the error variance. :math:`\\sigma^2` inside A is replaced by the estimate from the initial GLS estimate. :math:`\\sigma^2` in cov_params is obtained from the residuals of the final estimate. The sandwich form of the covariance estimator is not robust to misspecified heteroscedasticity or autocorrelation. """ lambd = pen_weight #this does duplicate transformation, but I need resid not wresid res_gls = GLS(self.endog, self.exog, sigma=self.sigma).fit() self.res_gls = res_gls sigma2_e = res_gls.mse_resid r_matrix = self.r_matrix q_matrix = self.q_matrix sigma_prior_inv = self.sigma_prior_inv x = self.wexog y = self.wendog[:,None] #why are sigma2_e * lambd multiplied, not ratio? #larger lambd -> stronger prior (it's not the variance) # Bayesian: lambd is precision = 1/sigma2_prior #print('lambd inside fit', lambd xx = np.dot(x.T, x) xpx = xx + \ sigma2_e * lambd * np.dot(r_matrix.T, np.dot(sigma_prior_inv, r_matrix)) xpy = np.dot(x.T, y) + \ sigma2_e * lambd * np.dot(r_matrix.T, np.dot(sigma_prior_inv, q_matrix)) #xpy = xpy[:,None] xpxi = np.linalg.pinv(xpx, rcond=1e-15**2) #to match pinv(x) in OLS case xpxi_sandwich = xpxi.dot(xx).dot(xpxi) params = np.dot(xpxi, xpy) #or solve params = np.squeeze(params) # normalized_cov_params should have sandwich form xpxi @ xx @ xpxi if cov_type == 'sandwich': normalized_cov_params = xpxi_sandwich elif cov_type == 'data-prior': normalized_cov_params = xpxi #why attach it to self, i.e. model? else: raise ValueError("cov_type has to be 'sandwich' or 'data-prior'") self.normalized_cov_params = xpxi_sandwich self.xpxi = xpxi self.sigma2_e = sigma2_e lfit = TheilRegressionResults(self, params, normalized_cov_params=normalized_cov_params, use_t=use_t) lfit.penalization_factor = lambd return lfit
def setupClass(cls): data = longley.load() data.exog = add_constant(data.exog) cls.res1 = GLS(data.endog, data.exog).fit() cls.res2 = OLS(data.endog, data.exog).fit()
sec_mobility_by_state.append( df2X.loc[df2X.state == state, predictors].values / 100) sec_mobility_std_by_state.append( df2X.loc[df2X.state == state, [val + '_std' for val in predictors]].values / 100) sec_count_by_state.append( survey_counts.loc[sec_start_date:sec_end_date, state].values) sec_respond_by_state.append( survey_respond.loc[sec_start_date:sec_end_date, state].values) policy_v = [1] * df2X.loc[df2X.state == sec_states[0]].shape[0] policy = dfX.loc[dfX.state == states_to_fit[0], 'post_policy'] state_index = {state: i + 1 for i, state in enumerate(states_to_fit)} # Setup Design matrix # fit model model = GLS(y, X, sigma=None) # generate predictions backcast = model.predict(exog=X) # predict using forecasted X data forecast = model.predict(exog=X_forecast) # plot results # calculate crps