Python lagmatの例、scikits.statsmodels.tsa.tsatools.lagmat Pythonの例

コード例 #1

0

ファイルを表示

ファイル: arima_model.py プロジェクト: takluyver/statsmodels

    def _fit_start_params_hr(self, order):
        """
        Get starting parameters for fit.

        Parameters
        ----------
        order : iterable
            (p,q,k) - AR lags, MA lags, and number of exogenous variables
            including the constant.

        Returns
        -------
        start_params : array
            A first guess at the starting parameters.

        Notes
        -----
        If necessary, fits an AR process with the laglength selected according
        to best BIC.  Obtain the residuals.  Then fit an ARMA(p,q) model via
        OLS using these residuals for a first approximation.  Uses a separate
        OLS regression to find the coefficients of exogenous variables.

        References
        ----------
        Hannan, E.J. and Rissanen, J.  1982.  "Recursive estimation of mixed
            autoregressive-moving average order."  `Biometrika`.  69.1.
        """
        p,q,k = order
        start_params = zeros((p+q+k))
        endog = self.endog.copy() # copy because overwritten
        exog = self.exog
        if k != 0:
            ols_params = GLS(endog, exog).fit().params
            start_params[:k] = ols_params
            endog -= np.dot(exog, ols_params).squeeze()
        if q != 0:
            if p != 0:
                armod = AR(endog).fit(ic='bic', trend='nc')
                arcoefs_tmp = armod.params
                p_tmp = armod.k_ar
                resid = endog[p_tmp:] - np.dot(lagmat(endog, p_tmp,
                                trim='both'), arcoefs_tmp)
                if p < p_tmp + q:
                    endog_start = p_tmp + q - p
                    resid_start = 0
                else:
                    endog_start = 0
                    resid_start = p - p_tmp - q
                lag_endog = lagmat(endog, p, 'both')[endog_start:]
                lag_resid = lagmat(resid, q, 'both')[resid_start:]
                # stack ar lags and resids
                X = np.column_stack((lag_endog, lag_resid))
                coefs = GLS(endog[max(p_tmp+q,p):], X).fit().params
                start_params[k:k+p+q] = coefs
            else:
                start_params[k+p:k+p+q] = yule_walker(endog, order=q)[0]
        if q==0 and p != 0:
            arcoefs = yule_walker(endog, order=p)[0]
            start_params[k:k+p] = arcoefs
        return start_params

コード例 #2

0

ファイルを表示

ファイル: arima_model.py プロジェクト: zed/statsmodels

    def _fit_start_params_hr(self, order):
        """
        Get starting parameters for fit.

        Parameters
        ----------
        order : iterable
            (p,q,k) - AR lags, MA lags, and number of exogenous variables
            including the constant.

        Returns
        -------
        start_params : array
            A first guess at the starting parameters.

        Notes
        -----
        If necessary, fits an AR process with the laglength selected according
        to best BIC.  Obtain the residuals.  Then fit an ARMA(p,q) model via
        OLS using these residuals for a first approximation.  Uses a separate
        OLS regression to find the coefficients of exogenous variables.

        References
        ----------
        Hannan, E.J. and Rissanen, J.  1982.  "Recursive estimation of mixed
            autoregressive-moving average order."  `Biometrika`.  69.1.
        """
        p, q, k = order
        start_params = zeros((p + q + k))
        endog = self.endog.copy()  # copy because overwritten
        exog = self.exog
        if k != 0:
            ols_params = GLS(endog, exog).fit().params
            start_params[:k] = ols_params
            endog -= np.dot(exog, ols_params).squeeze()
        if q != 0:
            if p != 0:
                armod = AR(endog).fit(ic='bic', trend='nc')
                arcoefs_tmp = armod.params
                p_tmp = armod.k_ar
                resid = endog[p_tmp:] - np.dot(
                    lagmat(endog, p_tmp, trim='both'), arcoefs_tmp)
                X = np.column_stack(
                    (lagmat(endog, p, 'both')[p_tmp + (q - p):],
                     lagmat(resid, q, 'both')))  # stack ar lags and resids
                coefs = GLS(endog[p_tmp + q:], X).fit().params
                start_params[k:k + p + q] = coefs
            else:
                start_params[k + p:k + p + q] = yule_walker(endog, order=q)[0]
        if q == 0 and p != 0:
            arcoefs = yule_walker(endog, order=p)[0]
            start_params[k:k + p] = arcoefs
        return start_params

コード例 #3

0

ファイルを表示

ファイル: ar_model.py プロジェクト: collinstocks/statsmodels

    def _stackX(self, k_ar, trend):
        """
        Private method to build the RHS matrix for estimation.

        Columns are trend terms then lags.
        """
        endog = self.endog
        X = lagmat(endog, maxlag=k_ar, trim='both')
        k_trend = util.get_trendorder(trend)
        if k_trend:
            X = add_trend(X, prepend=True, trend=trend)
        self.k_trend = k_trend
        return X

コード例 #4

0

ファイルを表示

ファイル: varma_process.py プロジェクト: katherineranney/statsmodels

    def fit(self, nlags):
        """estimate parameters using ols

        Parameters
        ----------
        nlags : integer
            number of lags to include in regression, same for all variables

        Returns
        -------
        None, but attaches

        arhat : array (nlags, nvar, nvar)
            full lag polynomial array
        arlhs : array (nlags-1, nvar, nvar)
            reduced lag polynomial for left hand side
        other statistics as returned by linalg.lstsq : need to be completed



        This currently assumes all parameters are estimated without restrictions.
        In this case SUR is identical to OLS

        estimation results are attached to the class instance


        """
        self.nlags = nlags  # without current period
        nvars = self.nvars
        # TODO: ar2s looks like a module variable, bug?
        # lmat = lagmat(ar2s, nlags, trim='both', original='in')
        lmat = lagmat(self.y, nlags, trim="both", original="in")
        self.yred = lmat[:, :nvars]
        self.xred = lmat[:, nvars:]
        res = np.linalg.lstsq(self.xred, self.yred)
        self.estresults = res
        self.arlhs = res[0].reshape(nlags, nvars, nvars)
        self.arhat = ar2full(self.arlhs)
        self.rss = res[1]
        self.xredrank = res[2]

コード例 #5

0

ファイルを表示

ファイル: varma_process.py プロジェクト: zed/statsmodels

    def fit(self, nlags):
        '''estimate parameters using ols

        Parameters
        ----------
        nlags : integer
            number of lags to include in regression, same for all variables

        Returns
        -------
        None, but attaches

        arhat : array (nlags, nvar, nvar)
            full lag polynomial array
        arlhs : array (nlags-1, nvar, nvar)
            reduced lag polynomial for left hand side
        other statistics as returned by linalg.lstsq : need to be completed



        This currently assumes all parameters are estimated without restrictions.
        In this case SUR is identical to OLS

        estimation results are attached to the class instance


        '''
        self.nlags = nlags  # without current period
        nvars = self.nvars
        #TODO: ar2s looks like a module variable, bug?
        #lmat = lagmat(ar2s, nlags, trim='both', original='in')
        lmat = lagmat(self.y, nlags, trim='both', original='in')
        self.yred = lmat[:, :nvars]
        self.xred = lmat[:, nvars:]
        res = np.linalg.lstsq(self.xred, self.yred)
        self.estresults = res
        self.arlhs = res[0].reshape(nlags, nvars, nvars)
        self.arhat = ar2full(self.arlhs)
        self.rss = res[1]
        self.xredrank = res[2]

コード例 #6

0

ファイルを表示

    def _stackX(self, k_ar, trend):
        """
        Private method to build the RHS matrix for estimation.

        Columns are trend terms, then exogenous, then lags.
        """
        endog = self.endog
        exog = self.exog
        X = lagmat(endog, maxlag=k_ar, trim='both')
        if exog is not None:
            X = np.column_stack((exog[k_ar:, :], X))
        # Handle trend terms
        if trend == 'c':
            k_trend = 1
        elif trend == 'nc':
            k_trend = 0
        elif trend == 'ct':
            k_trend = 2
        elif trend == 'ctt':
            k_trend = 3
        if trend != 'nc':
            X = add_trend(X, prepend=True, trend=trend)
        self.k_trend = k_trend
        return X

コード例 #7

0

ファイルを表示

ファイル: ar_model.py プロジェクト: chrisjordansquire/statsmodels

    def _stackX(self, k_ar, trend):
        """
        Private method to build the RHS matrix for estimation.

        Columns are trend terms, then exogenous, then lags.
        """
        endog = self.endog
        exog = self.exog
        X = lagmat(endog, maxlag=k_ar, trim='both')
        if exog is not None:
            X = np.column_stack((exog[k_ar:,:], X))
        # Handle trend terms
        if trend == 'c':
            k_trend = 1
        elif trend == 'nc':
            k_trend = 0
        elif trend == 'ct':
            k_trend = 2
        elif trend == 'ctt':
            k_trend = 3
        if trend != 'nc':
            X = add_trend(X,prepend=True, trend=trend)
        self.k_trend = k_trend
        return X

コード例 #8

0

ファイルを表示

ファイル: varma_process.py プロジェクト: katherineranney/statsmodels

    a22 = np.array([[[1.0, 0.0], [0.0, 1.0]], [[-0.8, 0.0], [0.1, -0.8]]])

    a23 = np.array([[[1.0, 0.0], [0.0, 1.0]], [[-0.8, 0.2], [0.1, -0.6]]])

    a24 = np.array([[[1.0, 0.0], [0.0, 1.0]], [[-0.6, 0.0], [0.2, -0.6]], [[-0.1, 0.0], [0.1, -0.1]]])

    a31 = np.r_[np.eye(3)[None, :, :], 0.8 * np.eye(3)[None, :, :]]
    a32 = np.array(
        [[[1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]], [[0.8, 0.0, 0.0], [0.1, 0.6, 0.0], [0.0, 0.0, 0.9]]]
    )

    ########
    ut = np.random.randn(1000, 2)
    ar2s = vargenerate(a22, ut)
    # res = np.linalg.lstsq(lagmat(ar2s,1)[:,1:], ar2s)
    res = np.linalg.lstsq(lagmat(ar2s, 1), ar2s)
    bhat = res[0].reshape(1, 2, 2)
    arhat = ar2full(bhat)
    # print maxabs(arhat - a22)

    v = Var(ar2s)
    v.fit(1)
    v.forecast()
    v.forecast(25)[-30:]

    ar23 = np.array([[[1.0, 0.0], [0.0, 1.0]], [[-0.6, 0.0], [0.2, -0.6]], [[-0.1, 0.0], [0.1, -0.1]]])

    ma22 = np.array([[[1.0, 0.0], [0.0, 1.0]], [[0.4, 0.0], [0.2, 0.3]]])

    ar23ns = np.array([[[1.0, 0.0], [0.0, 1.0]], [[-1.9, 0.0], [0.4, -0.6]], [[0.3, 0.0], [0.1, -0.1]]])

コード例 #9

0

ファイルを表示

def acorr_lm(x, maxlag=None, autolag='AIC', store=False):
    '''Lagrange Multiplier tests for autocorrelation

    not checked yet, copied from unitrood_adf with adjustments
    check array shapes because of the addition of the constant.
    written/copied without reference
    This is not Breush-Godfrey. BG adds lags of residual to exog in the
    design matrix for the auxiliary regression with residuals as endog,
    see Greene 12.7.1.

    Notes
    -----
    If x is calculated as y^2 for a time series y, then this test corresponds
    to the Engel test for autoregressive conditional heteroscedasticity (ARCH).
    TODO: get details and verify

    '''

    x = np.asarray(x)
    nobs = x.shape[0]
    if maxlag is None:
        #for adf from Greene referencing Schwert 1989
        maxlag = 12. * np.power(
            nobs / 100., 1 / 4.)  #nobs//4  #TODO: check default, or do AIC/BIC

    xdiff = np.diff(x)
    #
    xdall = lagmat(x[:-1, None], maxlag, trim='both')
    nobs = xdall.shape[0]
    xdall = np.c_[np.ones((nobs, 1)), xdall]
    xshort = x[-nobs:]

    if store: resstore = ResultsStore()

    if autolag:
        #search for lag length with highest information criteria
        #Note: I use the same number of observations to have comparable IC
        results = {}
        for mlag in range(1, maxlag):
            results[mlag] = sm.OLS(xshort, xdall[:, :mlag + 1]).fit()

        if autolag.lower() == 'aic':
            bestic, icbestlag = max((v.aic, k) for k, v in results.iteritems())
        elif autolag.lower() == 'bic':
            icbest, icbestlag = max((v.bic, k) for k, v in results.iteritems())
        else:
            raise ValueError("autolag can only be None, 'AIC' or 'BIC'")

        #rerun ols with best ic
        xdall = lagmat(x[:, None], icbestlag, trim='forward')
        nobs = xdall.shape[0]
        xdall = np.c_[np.ones((nobs, 1)), xdall]
        xshort = x[-nobs:]
        usedlag = icbestlag
    else:
        usedlag = maxlag

    resols = sm.OLS(xshort, xdall[:, :usedlag + 1]).fit()
    fval = resols.fvalue
    fpval = resols.f_pvalue
    lm = nobs * resols.rsquared
    lmpval = stats.chi2.sf(lm, usedlag)
    # Note: degrees of freedom for LM test is nvars minus constant = usedlags
    return fval, fpval, lm, lmpval

    if store:
        resstore.resols = resols
        resstore.usedlag = usedlag
        return fval, fpval, lm, lmpval, resstore
    else:
        return fval, fpval, lm, lmpval

コード例 #10

0

ファイルを表示

ファイル: diagnostic.py プロジェクト: chrisjordansquire/statsmodels

def acorr_lm(x, maxlag=None, autolag='AIC', store=False):
    '''Lagrange Multiplier tests for autocorrelation

    not checked yet, copied from unitrood_adf with adjustments
    check array shapes because of the addition of the constant.
    written/copied without reference
    This is not Breush-Godfrey. BG adds lags of residual to exog in the
    design matrix for the auxiliary regression with residuals as endog,
    see Greene 12.7.1.

    Notes
    -----
    If x is calculated as y^2 for a time series y, then this test corresponds
    to the Engel test for autoregressive conditional heteroscedasticity (ARCH).
    TODO: get details and verify

    '''

    x = np.asarray(x)
    nobs = x.shape[0]
    if maxlag is None:
        #for adf from Greene referencing Schwert 1989
        maxlag = 12. * np.power(nobs/100., 1/4.)#nobs//4  #TODO: check default, or do AIC/BIC


    xdiff = np.diff(x)
    #
    xdall = lagmat(x[:-1,None], maxlag, trim='both')
    nobs = xdall.shape[0]
    xdall = np.c_[np.ones((nobs,1)), xdall]
    xshort = x[-nobs:]

    if store: resstore = ResultsStore()

    if autolag:
        #search for lag length with highest information criteria
        #Note: I use the same number of observations to have comparable IC
        results = {}
        for mlag in range(1,maxlag):
            results[mlag] = sm.OLS(xshort, xdall[:,:mlag+1]).fit()

        if autolag.lower() == 'aic':
            bestic, icbestlag = max((v.aic,k) for k,v in results.iteritems())
        elif autolag.lower() == 'bic':
            icbest, icbestlag = max((v.bic,k) for k,v in results.iteritems())
        else:
            raise ValueError("autolag can only be None, 'AIC' or 'BIC'")

        #rerun ols with best ic
        xdall = lagmat(x[:,None], icbestlag, trim='forward')
        nobs = xdall.shape[0]
        xdall = np.c_[np.ones((nobs,1)), xdall]
        xshort = x[-nobs:]
        usedlag = icbestlag
    else:
        usedlag = maxlag

    resols = sm.OLS(xshort, xdall[:,:usedlag+1]).fit()
    fval = resols.fvalue
    fpval = resols.f_pvalue
    lm = nobs * resols.rsquared
    lmpval = stats.chi2.sf(lm, usedlag)
    # Note: degrees of freedom for LM test is nvars minus constant = usedlags
    return fval, fpval, lm, lmpval

    if store:
        resstore.resols = resols
        resstore.usedlag = usedlag
        return fval, fpval, lm, lmpval, resstore
    else:
        return fval, fpval, lm, lmpval

コード例 #11

0

ファイルを表示

ファイル: varma_process.py プロジェクト: zed/statsmodels

    a22 = np.array([[[1., 0.], [0., 1.]], [[-0.8, 0.], [0.1, -0.8]]])

    a23 = np.array([[[1., 0.], [0., 1.]], [[-0.8, 0.2], [0.1, -0.6]]])

    a24 = np.array([[[1., 0.], [0., 1.]], [[-0.6, 0.], [0.2, -0.6]],
                    [[-0.1, 0.], [0.1, -0.1]]])

    a31 = np.r_[np.eye(3)[None, :, :], 0.8 * np.eye(3)[None, :, :]]
    a32 = np.array([[[1., 0., 0.], [0., 1., 0.], [0., 0., 1.]],
                    [[0.8, 0., 0.], [0.1, 0.6, 0.], [0., 0., 0.9]]])

    ########
    ut = np.random.randn(1000, 2)
    ar2s = vargenerate(a22, ut)
    #res = np.linalg.lstsq(lagmat(ar2s,1)[:,1:], ar2s)
    res = np.linalg.lstsq(lagmat(ar2s, 1), ar2s)
    bhat = res[0].reshape(1, 2, 2)
    arhat = ar2full(bhat)
    #print maxabs(arhat - a22)

    v = Var(ar2s)
    v.fit(1)
    v.forecast()
    v.forecast(25)[-30:]

    ar23 = np.array([[[1., 0.], [0., 1.]], [[-0.6, 0.], [0.2, -0.6]],
                     [[-0.1, 0.], [0.1, -0.1]]])

    ma22 = np.array([[[1., 0.], [0., 1.]], [[0.4, 0.], [0.2, 0.3]]])

    ar23ns = np.array([[[1., 0.], [0., 1.]], [[-1.9, 0.], [0.4, -0.6]],