コード例 #1
0
ファイル: stattools.py プロジェクト: Inoryy/statsmodels
def kpss(x, regression='c', lags=None, store=False):
    """
    Kwiatkowski-Phillips-Schmidt-Shin test for stationarity.

    Computes the Kwiatkowski-Phillips-Schmidt-Shin (KPSS) test for the null
    hypothesis that x is level or trend stationary.

    Parameters
    ----------
    x : array_like, 1d
        Data series
    regression : str{'c', 'ct'}
        Indicates the null hypothesis for the KPSS test
        * 'c' : The data is stationary around a constant (default)
        * 'ct' : The data is stationary around a trend
    lags : int
        Indicates the number of lags to be used. If None (default),
        lags is set to int(12 * (n / 100)**(1 / 4)), as outlined in
        Schwert (1989).
    store : bool
        If True, then a result instance is returned additionally to
        the KPSS statistic (default is False).

    Returns
    -------
    kpss_stat : float
        The KPSS test statistic
    p_value : float
        The p-value of the test. The p-value is interpolated from
        Table 1 in Kwiatkowski et al. (1992), and a boundary point
        is returned if the test statistic is outside the table of
        critical values, that is, if the p-value is outside the
        interval (0.01, 0.1).
    lags : int
        The truncation lag parameter
    crit : dict
        The critical values at 10%, 5%, 2.5% and 1%. Based on
        Kwiatkowski et al. (1992).
    resstore : (optional) instance of ResultStore
        An instance of a dummy class with results attached as attributes

    Notes
    -----
    To estimate sigma^2 the Newey-West estimator is used. If lags is None,
    the truncation lag parameter is set to int(12 * (n / 100) ** (1 / 4)),
    as outlined in Schwert (1989). The p-values are interpolated from
    Table 1 of Kwiatkowski et al. (1992). If the computed statistic is
    outside the table of critical values, then a warning message is
    generated.

    Missing values are not handled.

    References
    ----------
    D. Kwiatkowski, P. C. B. Phillips, P. Schmidt, and Y. Shin (1992): Testing
    the Null Hypothesis of Stationarity against the Alternative of a Unit Root.
    `Journal of Econometrics` 54, 159-178.
    """
    from warnings import warn

    nobs = len(x)
    x = np.asarray(x)
    hypo = regression.lower()

    # if m is not one, n != m * n
    if nobs != x.size:
        raise ValueError("x of shape {0} not understood".format(x.shape))

    if hypo == 'ct':
        # p. 162 Kwiatkowski et al. (1992): y_t = beta * t + r_t + e_t,
        # where beta is the trend, r_t a random walk and e_t a stationary
        # error term.
        resids = OLS(x, add_constant(np.arange(1, nobs + 1))).fit().resid
        crit = [0.119, 0.146, 0.176, 0.216]
    elif hypo == 'c':
        # special case of the model above, where beta = 0 (so the null
        # hypothesis is that the data is stationary around r_0).
        resids = x - x.mean()
        crit = [0.347, 0.463, 0.574, 0.739]
    else:
        raise ValueError("hypothesis '{0}' not understood".format(hypo))

    if lags is None:
        # from Kwiatkowski et al. referencing Schwert (1989)
        lags = int(np.ceil(12. * np.power(nobs / 100., 1 / 4.)))

    pvals = [0.10, 0.05, 0.025, 0.01]

    eta = sum(resids.cumsum()**2) / (nobs**2)  # eq. 11, p. 165
    s_hat = _sigma_est_kpss(resids, nobs, lags)

    kpss_stat = eta / s_hat
    p_value = np.interp(kpss_stat, crit, pvals)

    if p_value == pvals[-1]:
        warn("p-value is smaller than the indicated p-value", InterpolationWarning)
    elif p_value == pvals[0]:
        warn("p-value is greater than the indicated p-value", InterpolationWarning)

    crit_dict = {'10%': crit[0], '5%': crit[1], '2.5%': crit[2], '1%': crit[3]}

    if store:
        rstore = ResultsStore()
        rstore.lags = lags
        rstore.nobs = nobs

        stationary_type = "level" if hypo == 'c' else "trend"
        rstore.H0 = "The series is {0} stationary".format(stationary_type)
        rstore.HA = "The series is not {0} stationary".format(stationary_type)

        return kpss_stat, p_value, crit_dict, rstore
    else:
        return kpss_stat, p_value, lags, crit_dict
コード例 #2
0
def kpss(x, regression='c', lags=None, store=False):
    """
    Kwiatkowski-Phillips-Schmidt-Shin test for stationarity.

    Computes the Kwiatkowski-Phillips-Schmidt-Shin (KPSS) test for the null
    hypothesis that x is level or trend stationary.

    Parameters
    ----------
    x : array_like, 1d
        Data series
    regression : str{'c', 'ct'}
        Indicates the null hypothesis for the KPSS test
        * 'c' : The data is stationary around a constant (default)
        * 'ct' : The data is stationary around a trend
    lags : int
        Indicates the number of lags to be used. If None (default),
        lags is set to int(12 * (n / 100)**(1 / 4)), as outlined in
        Schwert (1989).
    store : bool
        If True, then a result instance is returned additionally to
        the KPSS statistic (default is False).

    Returns
    -------
    kpss_stat : float
        The KPSS test statistic
    p_value : float
        The p-value of the test. The p-value is interpolated from
        Table 1 in Kwiatkowski et al. (1992), and a boundary point
        is returned if the test statistic is outside the table of
        critical values, that is, if the p-value is outside the
        interval (0.01, 0.1).
    lags : int
        The truncation lag parameter
    crit : dict
        The critical values at 10%, 5%, 2.5% and 1%. Based on
        Kwiatkowski et al. (1992).
    resstore : (optional) instance of ResultStore
        An instance of a dummy class with results attached as attributes

    Notes
    -----
    To estimate sigma^2 the Newey-West estimator is used. If lags is None,
    the truncation lag parameter is set to int(12 * (n / 100) ** (1 / 4)),
    as outlined in Schwert (1989). The p-values are interpolated from
    Table 1 of Kwiatkowski et al. (1992). If the computed statistic is
    outside the table of critical values, then a warning message is
    generated.

    Missing values are not handled.

    References
    ----------
    D. Kwiatkowski, P. C. B. Phillips, P. Schmidt, and Y. Shin (1992): Testing
    the Null Hypothesis of Stationarity against the Alternative of a Unit Root.
    `Journal of Econometrics` 54, 159-178.
    """
    from warnings import warn

    nobs = len(x)
    x = np.asarray(x)
    hypo = regression.lower()

    # if m is not one, n != m * n
    if nobs != x.size:
        raise ValueError("x of shape {0} not understood".format(x.shape))

    if hypo == 'ct':
        # p. 162 Kwiatkowski et al. (1992): y_t = beta * t + r_t + e_t,
        # where beta is the trend, r_t a random walk and e_t a stationary
        # error term.
        resids = OLS(x, add_constant(np.arange(1, nobs + 1))).fit().resid
        crit = [0.119, 0.146, 0.176, 0.216]
    elif hypo == 'c':
        # special case of the model above, where beta = 0 (so the null
        # hypothesis is that the data is stationary around r_0).
        resids = x - x.mean()
        crit = [0.347, 0.463, 0.574, 0.739]
    else:
        raise ValueError("hypothesis '{0}' not understood".format(hypo))

    if lags is None:
        # from Kwiatkowski et al. referencing Schwert (1989)
        lags = int(np.ceil(12. * np.power(nobs / 100., 1 / 4.)))

    pvals = [0.10, 0.05, 0.025, 0.01]

    eta = sum(resids.cumsum()**2) / (nobs**2)  # eq. 11, p. 165
    s_hat = _sigma_est_kpss(resids, nobs, lags)

    kpss_stat = eta / s_hat
    p_value = np.interp(kpss_stat, crit, pvals)

    if p_value == pvals[-1]:
        warn("p-value is smaller than the indicated p-value", InterpolationWarning)
    elif p_value == pvals[0]:
        warn("p-value is greater than the indicated p-value", InterpolationWarning)

    crit_dict = {'10%': crit[0], '5%': crit[1], '2.5%': crit[2], '1%': crit[3]}

    if store:
        rstore = ResultsStore()
        rstore.lags = lags
        rstore.nobs = nobs

        stationary_type = "level" if hypo == 'c' else "trend"
        rstore.H0 = "The series is {0} stationary".format(stationary_type)
        rstore.HA = "The series is not {0} stationary".format(stationary_type)

        return kpss_stat, p_value, crit_dict, rstore
    else:
        return kpss_stat, p_value, lags, crit_dict
コード例 #3
0
    def run(self, x, arlags=1, regression='c', method='mle', varest='var94'):
        """
        Leybourne-McCabe stationarity test

        The Leybourne-McCabe test can be used to test for stationarity in a
        univariate process.

        Parameters
        ----------
        x : array_like
            data series
        arlags : int
            number of autoregressive terms to include, default=None
        regression : {'c','ct'}
            Constant and trend order to include in regression
            * 'c'  : constant only (default)
            * 'ct' : constant and trend
        method : {'mle','ols'}
            Method used to estimate ARIMA(p, 1, 1) filter model
            * 'mle' : condition sum of squares maximum likelihood (default)
            * 'ols' : two-stage least squares
        varest : {'var94','var99'}
            Method used for residual variance estimation
            * 'var94' : method used in original Leybourne-McCabe paper (1994)
                        (default)
            * 'var99' : method used in follow-up paper (1999)

        Returns
        -------
        lmstat : float
            test statistic
        pvalue : float
            based on MC-derived critical values
        arlags : int
            AR(p) order used to create the filtered series
        cvdict : dict
            critical values for the test statistic at the 1%, 5%, and 10%
            levels

        Notes
        -----
        H0 = series is stationary

        Basic process is to create a filtered series which removes the AR(p)
        effects from the series under test followed by an auxiliary regression
        similar to that of Kwiatkowski et al (1992). The AR(p) coefficients
        are obtained by estimating an ARIMA(p, 1, 1) model. Two methods are
        provided for ARIMA estimation: MLE and two-stage least squares.

        Two methods are provided for residual variance estimation used in the
        calculation of the test statistic. The first method ('var94') is the
        mean of the squared residuals from the filtered regression. The second
        method ('var99') is the MA(1) coefficient times the mean of the squared
        residuals from the ARIMA(p, 1, 1) filtering model.

        An empirical autolag procedure is provided. In this context, the number
        of lags is equal to the number of AR(p) terms used in the filtering
        step. The number of AR(p) terms is set equal to the to the first PACF
        falling within the 95% confidence interval. Maximum nuber of AR lags is
        limited to 1/2 series length.

        References
        ----------
        Kwiatkowski, D., Phillips, P.C.B., Schmidt, P. & Shin, Y. (1992).
        Testing the null hypothesis of stationarity against the alternative of
        a unit root. Journal of Econometrics, 54: 159–178.

        Leybourne, S.J., & McCabe, B.P.M. (1994). A consistent test for a
        unit root. Journal of Business and Economic Statistics, 12: 157–166.

        Leybourne, S.J., & McCabe, B.P.M. (1999). Modified stationarity tests
        with data-dependent model-selection rules. Journal of Business and
        Economic Statistics, 17: 264-270.

        Schwert, G W. (1987). Effects of model specification on tests for unit
        roots in macroeconomic data. Journal of Monetary Economics, 20: 73–103.
        """
        if regression not in ['c', 'ct']:
            raise ValueError('LM: regression option \'%s\' not understood' %
                             regression)
        if method not in ['mle', 'ols']:
            raise ValueError('LM: method option \'%s\' not understood' %
                             method)
        if varest not in ['var94', 'var99']:
            raise ValueError('LM: varest option \'%s\' not understood' %
                             varest)
        x = np.asarray(x)
        if x.ndim > 2 or (x.ndim == 2 and x.shape[1] != 1):
            raise ValueError(
                'LM: x must be a 1d array or a 2d array with a single column')
        x = np.reshape(x, (-1, 1))
        # determine AR order if not specified
        if arlags == None:
            arlags = self._autolag(x)
        elif not isinstance(arlags,
                            int) or arlags < 1 or arlags > int(len(x) / 2):
            raise ValueError('LM: arlags must be an integer in range [1..%s]' %
                             str(int(len(x) / 2)))
        # estimate the reduced ARIMA(p, 1, 1) model
        if method == 'mle':
            arfit = ARIMA(x, order=(arlags, 1, 1), trend=regression).fit()
            resids = arfit.resid
            arcoeffs = arfit.arparams
            theta = arfit.maparams[0]
        else:
            arcoeffs, theta, resids = self._tsls_arima(x,
                                                       arlags,
                                                       model=regression)
        # variance estimator from (1999) LM paper
        var99 = abs(theta * np.sum(resids**2) / len(resids))
        # create the filtered series:
        #   z(t) = x(t) - arcoeffs[0]*x(t-1) - ... - arcoeffs[p-1]*x(t-p)
        z = np.full(len(x) - arlags, np.inf)
        for i in range(len(z)):
            z[i] = x[i + arlags]
            for j in range(len(arcoeffs)):
                z[i] -= arcoeffs[j] * x[i + arlags - j - 1]
        # regress the filtered series against a constant and
        # trend term (if requested)
        if regression == 'c':
            resids = z - z.mean()
        else:
            resids = OLS(z, add_constant(np.arange(1, len(z) + 1))).fit().resid
        # variance estimator from (1994) LM paper
        var94 = np.sum(resids**2) / len(resids)
        # compute test statistic with specified variance estimator
        eta = np.sum(resids.cumsum()**2) / (len(resids)**2)
        if varest == 'var99':
            lmstat = eta / var99
        else:
            lmstat = eta / var94
        # calculate pval
        crit = self.__leybourne_crit(lmstat, regression)
        lmpval = crit[0]
        cvdict = crit[1]
        return lmstat, lmpval, arlags, cvdict