Beispiel #1
0
    def _fit_start_params_hr(self, order):
        """
        Get starting parameters for fit.

        Parameters
        ----------
        order : iterable
            (p,q,k) - AR lags, MA lags, and number of exogenous variables
            including the constant.

        Returns
        -------
        start_params : array
            A first guess at the starting parameters.

        Notes
        -----
        If necessary, fits an AR process with the laglength selected according
        to best BIC.  Obtain the residuals.  Then fit an ARMA(p,q) model via
        OLS using these residuals for a first approximation.  Uses a separate
        OLS regression to find the coefficients of exogenous variables.

        References
        ----------
        Hannan, E.J. and Rissanen, J.  1982.  "Recursive estimation of mixed
            autoregressive-moving average order."  `Biometrika`.  69.1.
        """
        p, q, k = order
        start_params = zeros((p + q + k))
        endog = self.endog.copy()  # copy because overwritten
        exog = self.exog
        if k != 0:
            ols_params = GLS(endog, exog).fit().params
            start_params[:k] = ols_params
            endog -= np.dot(exog, ols_params).squeeze()
        if q != 0:
            if p != 0:
                armod = AR(endog).fit(ic='bic', trend='nc')
                arcoefs_tmp = armod.params
                p_tmp = armod.k_ar
                resid = endog[p_tmp:] - np.dot(
                    lagmat(endog, p_tmp, trim='both'), arcoefs_tmp)
                if p < p_tmp + q:
                    endog_start = p_tmp + q - p
                    resid_start = 0
                else:
                    endog_start = 0
                    resid_start = p - p_tmp - q
                lag_endog = lagmat(endog, p, 'both')[endog_start:]
                lag_resid = lagmat(resid, q, 'both')[resid_start:]
                # stack ar lags and resids
                X = np.column_stack((lag_endog, lag_resid))
                coefs = GLS(endog[max(p_tmp + q, p):], X).fit().params
                start_params[k:k + p + q] = coefs
            else:
                start_params[k + p:k + p + q] = yule_walker(endog, order=q)[0]
        if q == 0 and p != 0:
            arcoefs = yule_walker(endog, order=p)[0]
            start_params[k:k + p] = arcoefs
        return start_params
Beispiel #2
0
    def fit(self, lambd=1.):
        #this does duplicate transformation, but I need resid not wresid
        res_gls = GLS(self.endog, self.exog, sigma=self.sigma).fit()
        self.res_gls = res_gls
        sigma2_e = res_gls.mse_resid

        r_matrix = self.r_matrix
        q_matrix = self.q_matrix
        sigma_prior_inv = self.sigma_prior_inv
        x = self.wexog
        y = self.wendog[:, None]
        #why are sigma2_e * lambd multiplied, not ratio?
        #larger lambd -> stronger prior  (it's not the variance)
        #print 'lambd inside fit', lambd
        xpx = np.dot(x.T, x) + \
              sigma2_e * lambd * np.dot(r_matrix.T, np.dot(sigma_prior_inv, r_matrix))
        xpy = np.dot(x.T, y) + \
              sigma2_e * lambd * np.dot(r_matrix.T, np.dot(sigma_prior_inv, q_matrix))
        #xpy = xpy[:,None]

        xpxi = np.linalg.pinv(xpx)
        params = np.dot(xpxi, xpy)  #or solve
        params = np.squeeze(params)
        self.normalized_cov_params = xpxi  #why attach it to self, i.e. model?

        lfit = TheilRegressionResults(self, params, normalized_cov_params=xpxi)

        lfit.penalization_factor = lambd
        return lfit
Beispiel #3
0
 def setupClass(cls):
     data = longley.load()
     data.exog = add_constant(data.exog)
     ols_res = OLS(data.endog, data.exog).fit()
     gls_res = GLS(data.endog, data.exog).fit()
     cls.res1 = gls_res
     cls.res2 = ols_res
Beispiel #4
0
 def fit(self):
     """
     """
     delta = []
     wexog = self.wexog
     endog = self.endog
     for j in range(self._M):
         delta.append(GLS(endog[j], wexog[j]).fit().params)
     return delta
Beispiel #5
0
    def __init__(self, sys, sigma=None, dfk=None):
        if len(sys) % 2 != 0:
            raise ValueError("sys must be a list of pairs of endogenous and \
exogenous variables.  Got length %s" % len(sys))
        if dfk:
            if not dfk.lower() in ['dfk1', 'dfk2']:
                raise ValueError("dfk option %s not understood" % (dfk))
        self._dfk = dfk
        M = len(sys[1::2])
        self._M = M
        #        exog = np.zeros((M,M), dtype=object)
        #        for i,eq in enumerate(sys[1::2]):
        #            exog[i,i] = np.asarray(eq)  # not sure this exog is needed
        # used to compute resids for now
        exog = np.column_stack(np.asarray(sys[1::2][i]) for i in range(M))
        #       exog = np.vstack(np.asarray(sys[1::2][i]) for i in range(M))
        self.exog = exog  # 2d ndarray exog is better
        # Endog, might just go ahead and reshape this?
        endog = np.asarray(sys[::2])
        self.endog = endog
        self.nobs = float(
            self.endog[0].shape[0])  # assumes all the same length

        # Degrees of Freedom
        df_resid = []
        df_model = []
        [df_resid.append(self.nobs - tools.rank(_)) \
                for _ in sys[1::2]]
        [df_model.append(tools.rank(_) - 1) for _ in sys[1::2]]
        self.df_resid = np.asarray(df_resid)
        self.df_model = np.asarray(df_model)

        # "Block-diagonal" sparse matrix of exog
        sp_exog = sparse.lil_matrix(
            (int(self.nobs * M),
             int(np.sum(self.df_model + 1))))  # linked lists to build
        self._cols = np.cumsum(np.hstack((0, self.df_model + 1)))
        for i in range(M):
            sp_exog[i * self.nobs:(i + 1) * self.nobs,
                    self._cols[i]:self._cols[i + 1]] = sys[1::2][i]
        self.sp_exog = sp_exog.tocsr()  # cast to compressed for efficiency
        # Deal with sigma, check shape earlier if given
        if np.any(sigma):
            sigma = np.asarray(sigma)  # check shape
        elif sigma == None:
            resids = []
            for i in range(M):
                resids.append(
                    GLS(endog[i],
                        exog[:, self._cols[i]:self._cols[i + 1]]).fit().resid)
            resids = np.asarray(resids).reshape(M, -1)
            sigma = self._compute_sigma(resids)
        self.sigma = sigma
        self.cholsigmainv = np.linalg.cholesky(np.linalg.pinv(\
                    self.sigma)).T
        self.initialize()
Beispiel #6
0
    def _fit_btwn(self, method, effects):
        # group mean regression or WLS
        if effects != "twoway":
            endog = self._group_mean(self.endog, index=effects)
            exog = self._group_mean(self.exog, index=effects)
        else:
            raise ValueError("%s effects is not valid for the between \
estimator" % s)
        befit = GLS(endog, exog).fit()
        return befit
Beispiel #7
0
    def setupClass(cls):
        from results.results_regression import LongleyGls

        data = longley.load()
        exog = add_constant(np.column_stack(\
                (data.exog[:,1],data.exog[:,4])))
        tmp_results = OLS(data.endog, exog).fit()
        rho = np.corrcoef(tmp_results.resid[1:],
                          tmp_results.resid[:-1])[0][1]  # by assumption
        order = toeplitz(np.arange(16))
        sigma = rho**order
        GLS_results = GLS(data.endog, exog, sigma=sigma).fit()
        cls.res1 = GLS_results
        cls.res2 = LongleyGls()
Beispiel #8
0
    def fit(self, lambd=1.):
        #maybe iterate
        #preliminary estimate
        res_gls = GLS(self.endog, self.exog, sigma=self.sigma).fit()
        res_resid = OLS(res_gls.resid**2, self.exog_var).fit()
        #or  log-link
        #res_resid = OLS(np.log(res_gls.resid**2), self.exog_var).fit()
        #here I could use whiten and current instance instead of delegating
        #but this is easier
        #see pattern of GLSAR, calls self.initialize and self.fit
        res_wls = WLS(self.endog,
                      self.exog,
                      weights=1. / res_resid.fittedvalues).fit()

        res_wls._results.results_residual_regression = res_resid
        return res_wls
Beispiel #9
0
    def fit(self, model=None, method=None, effects='oneway'):
        """
        method : LSDV, demeaned, MLE, GLS, BE, FE, optional
        model :
                between
                fixed
                random
                pooled
                [gmm]
        effects :
                oneway
                time
                twoway
        femethod : demeaned (only one implemented)
                   WLS
        remethod :
                swar -
                amemiya
                nerlove
                walhus


        Notes
        ------
        This is unfinished.  None of the method arguments work yet.
        Only oneway effects should work.
        """
        if method: # get rid of this with default
            method = method.lower()
        model = model.lower()
        if method and method not in ["lsdv", "demeaned", "mle", "gls", "be",
            "fe"]: # get rid of if method with default
            raise ValueError("%s not a valid method" % method)
#        if method == "lsdv":
#            self.fit_lsdv(model)
        if model == 'pooled':
            return GLS(self.endog, self.exog).fit()
        if model == 'between':
            return self._fit_btwn(method, effects)
        if model == 'fixed':
            return self._fit_fixed(method, effects)
Beispiel #10
0
    def whiten(self, Y):
        """
        Runs the first stage of the 2SLS.

        Returns the RHS variables that include the instruments.
        """
        wexog = []
        indep_endog = self._indep_endog  # this has the col mapping
        #        fullexog = self.fullexog
        instruments = self.instruments
        for eq in range(
                self._M):  # need to go through all equations regardless
            instr_eq = Y.get(eq, None)  # Y has the eq to ind endog array map
            newRHS = self.exog[eq].copy()
            if instr_eq:
                for i, LHS in enumerate(instr_eq):
                    yhat = GLS(LHS, self.instruments).fit().fittedvalues
                    newRHS[:, indep_endog[eq][i]] = yhat
                # this might fail if there is a one variable column (nobs,)
                # in exog
            wexog.append(newRHS)
        return wexog
Beispiel #11
0
    def _fit_fixed(self, method, effects):
        endog = self.endog
        exog = self.exog
        demeantwice = False
        if effects in ["oneway","twoways"]:
            if effects == "twoways":
                demeantwice = True
                effects = "oneway"
            endog_mean, counts = self._group_mean(endog, index=effects,
                counts=True)
            exog_mean = self._group_mean(exog, index=effects)
            counts = counts.astype(int)
            endog = endog - np.repeat(endog_mean, counts)
            exog = exog - np.repeat(exog_mean, counts, axis=0)
        if demeantwice or effects == "time":
            endog_mean, dummies = self._group_mean(endog, index="time",
                dummies=True)
            exog_mean = self._group_mean(exog, index="time")
            # This allows unbalanced panels
            endog = endog - np.dot(endog_mean, dummies)
            exog = exog - np.dot(dummies.T, exog_mean)
        fefit = GLS(endog, exog[:,-self._cons_index]).fit()
#TODO: might fail with one regressor
        return fefit
Beispiel #12
0
 def setupClass(cls):
     from gwstatsmodels.datasets.ccard import load
     data = load()
     cls.res1 = WLS(data.endog, data.exog,
                    weights=1 / data.exog[:, 2]).fit()
     cls.res2 = GLS(data.endog, data.exog, sigma=data.exog[:, 2]).fit()