Esempio n. 1
0
    def test_regularized_weights(self):

        np.random.seed(1432)
        exog1 = np.random.normal(size=(100, 3))
        endog1 = exog1[:, 0] + exog1[:, 1] + np.random.normal(size=100)
        exog2 = np.random.normal(size=(100, 3))
        endog2 = exog2[:, 0] + exog2[:, 1] + np.random.normal(size=100)

        exog_a = np.vstack((exog1, exog1, exog2))
        endog_a = np.concatenate((endog1, endog1, endog2))

        # Should be equivalent to exog_a, endog_a.
        exog_b = np.vstack((exog1, exog2))
        endog_b = np.concatenate((endog1, endog2))
        wgts = np.ones(200)
        wgts[0:100] = 2
        sigma = np.diag(1/wgts)

        for L1_wt in 0, 0.5, 1:
            for alpha in 0, 1:
                mod1 = OLS(endog_a, exog_a)
                rslt1 = mod1.fit_regularized(L1_wt=L1_wt, alpha=alpha)

                mod2 = WLS(endog_b, exog_b, weights=wgts)
                rslt2 = mod2.fit_regularized(L1_wt=L1_wt, alpha=alpha)

                mod3 = GLS(endog_b, exog_b, sigma=sigma)
                rslt3 = mod3.fit_regularized(L1_wt=L1_wt, alpha=alpha)

                assert_almost_equal(rslt1.params, rslt2.params, decimal=3)
                assert_almost_equal(rslt1.params, rslt3.params, decimal=3)
Esempio n. 2
0
 def setup_class(cls):
     data = longley.load(as_pandas=False)
     data.exog = add_constant(data.exog, prepend=False)
     cls.res1 = GLS(data.endog, data.exog).fit()
     cls.res2 = OLS(data.endog, data.exog).fit()
Esempio n. 3
0
 def setup_class(cls):
     from statsmodels.datasets.ccard import load
     data = load(as_pandas=False)
     cls.res1 = WLS(data.endog, data.exog,
                    weights=1 / data.exog[:, 2]).fit()
     cls.res2 = GLS(data.endog, data.exog, sigma=data.exog[:, 2]).fit()
Esempio n. 4
0
    def fit(self, pen_weight=1., cov_type='sandwich', use_t=True):
        """Estimate parameters and return results instance

        Parameters
        ----------
        pen_weight : float
            penalization factor for the restriction, default is 1.
        cov_type : string, 'data-prior' or 'sandwich'
            'data-prior' assumes that the stochastic restriction reflects a
            previous sample. The covariance matrix of the parameter estimate
            is in this case the same form as the one of GLS.
            The covariance matrix for cov_type='sandwich' treats the stochastic
            restriction (R and q) as fixed and has a sandwich form analogously
            to M-estimators.

        Returns
        -------
        results : TheilRegressionResults instance

        Notes
        -----
        cov_params for cov_type data-prior, is calculated as

        .. math:: \\sigma^2 A^{-1}

        cov_params for cov_type sandwich, is calculated as

        .. math:: \\sigma^2 A^{-1} (X'X) A^{-1}

        where :math:`A = X' \\Sigma X + \\lambda \\sigma^2 R' \\Simga_p^{-1} R`

        :math:`\\sigma^2` is an estimate of the error variance.
        :math:`\\sigma^2` inside A is replaced by the estimate from the initial
        GLS estimate. :math:`\\sigma^2` in cov_params is obtained from the
        residuals of the final estimate.

        The sandwich form of the covariance estimator is not robust to
        misspecified heteroscedasticity or autocorrelation.

        """
        lambd = pen_weight
        #this does duplicate transformation, but I need resid not wresid
        res_gls = GLS(self.endog, self.exog, sigma=self.sigma).fit()
        self.res_gls = res_gls
        sigma2_e = res_gls.mse_resid

        r_matrix = self.r_matrix
        q_matrix = self.q_matrix
        sigma_prior_inv = self.sigma_prior_inv
        x = self.wexog
        y = self.wendog[:,None]
        #why are sigma2_e * lambd multiplied, not ratio?
        #larger lambd -> stronger prior  (it's not the variance)
        # Bayesian: lambd is precision = 1/sigma2_prior
        #print('lambd inside fit', lambd
        xx = np.dot(x.T, x)
        xpx = xx + \
              sigma2_e * lambd * np.dot(r_matrix.T, np.dot(sigma_prior_inv, r_matrix))
        xpy = np.dot(x.T, y) + \
              sigma2_e * lambd * np.dot(r_matrix.T, np.dot(sigma_prior_inv, q_matrix))
        #xpy = xpy[:,None]

        xpxi = np.linalg.pinv(xpx, rcond=1e-15**2)  #to match pinv(x) in OLS case
        xpxi_sandwich = xpxi.dot(xx).dot(xpxi)
        params = np.dot(xpxi, xpy)    #or solve
        params = np.squeeze(params)
        # normalized_cov_params should have sandwich form xpxi @ xx @ xpxi
        if cov_type == 'sandwich':
            normalized_cov_params = xpxi_sandwich
        elif cov_type == 'data-prior':
            normalized_cov_params = xpxi    #why attach it to self, i.e. model?
        else:
            raise ValueError("cov_type has to be 'sandwich' or 'data-prior'")

        self.normalized_cov_params = xpxi_sandwich
        self.xpxi = xpxi
        self.sigma2_e = sigma2_e
        lfit = TheilRegressionResults(self, params,
                       normalized_cov_params=normalized_cov_params, use_t=use_t)

        lfit.penalization_factor = lambd
        return lfit
Esempio n. 5
0
 def setupClass(cls):
     data = longley.load()
     data.exog = add_constant(data.exog)
     cls.res1 = GLS(data.endog, data.exog).fit()
     cls.res2 = OLS(data.endog, data.exog).fit()
Esempio n. 6
0
        sec_mobility_by_state.append(
            df2X.loc[df2X.state == state, predictors].values / 100)
        sec_mobility_std_by_state.append(
            df2X.loc[df2X.state == state, [val + '_std'
                                           for val in predictors]].values /
            100)
        sec_count_by_state.append(
            survey_counts.loc[sec_start_date:sec_end_date, state].values)
        sec_respond_by_state.append(
            survey_respond.loc[sec_start_date:sec_end_date, state].values)

    policy_v = [1] * df2X.loc[df2X.state == sec_states[0]].shape[0]
    policy = dfX.loc[dfX.state == states_to_fit[0], 'post_policy']

    state_index = {state: i + 1 for i, state in enumerate(states_to_fit)}
    # Setup Design matrix

    # fit model

    model = GLS(y, X, sigma=None)

    # generate predictions

    backcast = model.predict(exog=X)

    # predict using forecasted X data

    forecast = model.predict(exog=X_forecast)
    # plot results

    # calculate crps