Example #1
0
    def test_series_both(self):
        expected = pd.DataFrame(
            index=self.series.index,
            columns=["cpi", "cpi.L.1", "cpi.L.2", "cpi.L.3"],
        )
        expected["cpi"] = self.series
        for lag in range(1, 4):
            expected["cpi.L." + str(int(lag))] = self.series.shift(lag)
        expected = expected.iloc[3:]

        both = stattools.lagmat(self.series,
                                3,
                                trim="both",
                                original="in",
                                use_pandas=True)
        assert_frame_equal(both, expected)
        lags = stattools.lagmat(self.series,
                                3,
                                trim="both",
                                original="ex",
                                use_pandas=True)
        assert_frame_equal(lags, expected.iloc[:, 1:])
        lags, lead = stattools.lagmat(self.series,
                                      3,
                                      trim="both",
                                      original="sep",
                                      use_pandas=True)
        assert_frame_equal(lead, expected.iloc[:, :1])
        assert_frame_equal(lags, expected.iloc[:, 1:])
Example #2
0
 def test_dataframe_forward(self):
     data = self.macro_df
     columns = list(data.columns)
     n = data.shape[0]
     values = np.zeros((n + 3, 16))
     values[:n, :4] = data.values
     for lag in range(1, 4):
         new_cols = [col + ".L." + str(lag) for col in data]
         columns.extend(new_cols)
         values[lag:n + lag, 4 * lag:4 * (lag + 1)] = data.values
     index = data.index
     values = values[:n]
     expected = pd.DataFrame(values, columns=columns, index=index)
     both = stattools.lagmat(self.macro_df,
                             3,
                             trim="forward",
                             original="in",
                             use_pandas=True)
     assert_frame_equal(both, expected)
     lags = stattools.lagmat(self.macro_df,
                             3,
                             trim="forward",
                             original="ex",
                             use_pandas=True)
     assert_frame_equal(lags, expected.iloc[:, 4:])
     lags, lead = stattools.lagmat(self.macro_df,
                                   3,
                                   trim="forward",
                                   original="sep",
                                   use_pandas=True)
     assert_frame_equal(lags, expected.iloc[:, 4:])
     assert_frame_equal(lead, expected.iloc[:, :4])
Example #3
0
    def __init__(self, y, exog=None, pmax=12, regression='c', ic='AIC'):
        """
        Parameters
        ----------
        y : array or dataframe
            endogenous/response variable.
        pmax : int
            Maximum lag which is included in test.
        regression : {"c","ct","ctt","nc"}
            Constant and trend order to include in regression.
        ic : {"AIC", "BIC", "t-stat", None}
            Information criteria to use when automatically determining the lag.
        """
        self.pmax = pmax
        self.regression = regression
        self.ic = ic
        self.name = y.name
        self.symbols = ['α₀', 'ρ₁']

        # Setup for endog and exog
        L1_y = lagmat(y, maxlag=1, use_pandas=True)[1:]  # creating lags
        exog = exog[1:] if exog is not None else exog

        # Identifying optimal lags
        resultsADF = adfuller(y, self.pmax, self.regression, self.ic)
        lags = resultsADF[2]

        Ldy = lagmat(y.diff()[1:], maxlag=lags,
                     use_pandas=True).add_prefix('Δ')
        X = pd.concat([L1_y, Ldy, exog], axis=1)

        # endog and exog
        self.y = y[lags + 1:]
        self.X = add_constant(X)[lags:]
Example #4
0
    def test_add_lag1d(self):
        data = self.random_data
        lagmat = stattools.lagmat(data, 3, trim="Both")
        results = np.column_stack((data[3:], lagmat))
        lag_data = tools.add_lag(data, lags=3, insert=True)
        assert_equal(results, lag_data)

        # add index
        data = data[:, None]
        lagmat = stattools.lagmat(data, 3, trim="Both")  # test for lagmat too
        results = np.column_stack((data[3:], lagmat))
        lag_data = tools.add_lag(data, lags=3, insert=True)
        assert_equal(results, lag_data)
Example #5
0
    def test_tstat(self):
        exog, endog = lagmat(self.inflation, 12, original='sep', trim='both')
        icbest, sel_lag = _autolag(OLS, endog, exog, 1, 11, 't-stat')
        icbest2, sel_lag2 = _autolag_ols(endog, exog, 0, 12, 't-stat')
        assert np.isscalar(icbest2)
        assert np.isscalar(sel_lag2)
        assert sel_lag == sel_lag2

        exog, endog = lagmat(self.y, 12, original='sep', trim='both')
        icbest, sel_lag = _autolag(OLS, endog, exog, 1, 11, 't-stat')
        icbest2, sel_lag2 = _autolag_ols(endog, exog, 0, 12, 't-stat')
        assert np.isscalar(icbest2)
        assert np.isscalar(sel_lag2)
        assert sel_lag == sel_lag2
Example #6
0
    def test_tstat(self):
        exog, endog = lagmat(self.inflation, 12, original="sep", trim="both")
        _, sel_lag = _autolag(OLS, endog, exog, 1, 11, "t-stat")
        icbest2, sel_lag2 = _autolag_ols(endog, exog, 0, 12, "t-stat")
        assert np.isscalar(icbest2)
        assert np.isscalar(sel_lag2)
        assert sel_lag == sel_lag2

        exog, endog = lagmat(self.y, 12, original="sep", trim="both")
        _, sel_lag = _autolag(OLS, endog, exog, 1, 11, "t-stat")
        icbest2, sel_lag2 = _autolag_ols(endog, exog, 0, 12, "t-stat")
        assert np.isscalar(icbest2)
        assert np.isscalar(sel_lag2)
        assert sel_lag == sel_lag2
Example #7
0
    def test_tstat(self):
        exog, endog = lagmat(self.inflation, 12, original='sep', trim='both')
        icbest, sel_lag = _autolag(OLS, endog, exog, 1, 11, 't-stat')
        icbest2, sel_lag2 = _autolag_ols(endog, exog, 0, 12, 't-stat')
        assert np.isscalar(icbest2)
        assert np.isscalar(sel_lag2)
        assert sel_lag == sel_lag2

        exog, endog = lagmat(self.y, 12, original='sep', trim='both')
        icbest, sel_lag = _autolag(OLS, endog, exog, 1, 11, 't-stat')
        icbest2, sel_lag2 = _autolag_ols(endog, exog, 0, 12, 't-stat')
        assert np.isscalar(icbest2)
        assert np.isscalar(sel_lag2)
        assert sel_lag == sel_lag2
Example #8
0
    def setup(self, endog, model=None, pmax=None, ic=None, exog=None):
        # Erase previous results
        self.results = None

        # Custom instance changes
        if model != None:
            self.model = model
        if pmax != None:
            self.pmax = pmax
        if ic != None:
            self.ic = ic

        if type(endog) != pd.Series:
            self.endog = pd.Series(endog, name='y')
            self.endog_original = pd.Series(endog, name='y')
        else:
            self.endog = endog
            self.endog_original = endog

        # Creating endog and exog
        y1 = pd.DataFrame(self.endog.shift(1)[1:]).add_suffix(
            '.L1')  # creating lags
        dy = self.endog.diff(1)[1:]  # first difference
        X = y1

        resultsADF = adfuller(self.endog,
                              maxlag=self.pmax,
                              regression=model,
                              autolag=self.ic)
        ADFt, p, cvADF = resultsADF[0], resultsADF[2], resultsADF[4]

        # Defining series in case of possible legs
        if p > 0:
            dyl = pd.DataFrame(
                lagmat(dy, maxlag=p),
                index=dy.index,
                columns=['Diff.L' + str(i) for i in range(1, 1 + p)])[p:]
            X = pd.concat([y1[p:], dyl], axis=1)
        # If the ic decides not to include lags
        else:
            X = X[p:]

        # Removing tails
        self.lags = p
        self.extraExog = exog[p + 1:] if type(exog) != type(None) else None
        self.L1 = y1[p:]
        self.diff = dy[p:]
        self.diffLags = dyl if self.lags > 0 else None
        self.endog = self.endog[p + 1:]
        if self.model == 'c':
            self.exog = add_constant(X)
        elif self.model == 'ct':
            self.exog = add_trend(add_constant(X), 'ct')
        else:
            raise ValueError('Model type is not recognized: ' +
                             str(self.model))

        # Adding additional exog if present
        self.exog = pd.concat([self.exog, self.extraExog], axis=1)
        return self
Example #9
0
 def test_add_lag_drop_insert(self):
     data = self.macro_df.values
     nddata = data.astype(float)
     lagmat = stattools.lagmat(nddata[:, 2], 3, trim="Both")
     results = np.column_stack((nddata[3:, :2], lagmat, nddata[3:, -1]))
     lag_data = tools.add_lag(data, self.realgdp_loc, 3, drop=True)
     assert_equal(lag_data, results)
Example #10
0
 def test_add_lag_noinsert_ndarray(self):
     data = self.macro_df.values
     nddata = data.astype(float)
     lagmat = stattools.lagmat(nddata[:, 2], 3, trim="Both")
     results = np.column_stack((nddata[3:, :], lagmat))
     lag_data = tools.add_lag(nddata, 2, 3, insert=False)
     assert_equal(lag_data, results)
Example #11
0
def _za_thread(x, regression, start, end, nobs, basecols, baselags, res,
               residx):
    # first-diff y
    dy = np.diff(x, axis=0)[:, 0]
    zastat = bpidx = np.inf
    for bp in range(start, end):
        # reserve exog space
        exog = np.zeros((dy[baselags:].shape[0], basecols + baselags))
        # constant
        exog[:, 0] = 1
        # intercept dummy / trend / trend dummy
        if regression != 't':
            exog[(bp - (baselags + 1)):, 1] = 1
            exog[:, 2] = np.arange(baselags + 2, nobs + 1)
            if regression == 'ct':
                exog[(bp - (baselags + 1)):, 3] = np.arange(1, nobs - bp + 1)
        else:
            exog[:, 1] = np.arange(baselags + 2, nobs + 1)
            exog[(bp - (baselags + 1)):, 2] = np.arange(1, nobs - bp + 1)
        # lagged y
        exog[:, basecols - 1] = x[baselags:(nobs - 1), 0]
        # lagged dy
        exog[:, basecols:] = tsa.lagmat(
            dy, baselags, trim='none')[baselags:exog.shape[0] + baselags]
        stat = lm.OLS(dy[baselags:], exog).fit().tvalues[basecols - 1]
        if stat < zastat:
            zastat = stat
            bpidx = bp - 1
            crit = zacrit.za_crit(zastat, regression)
            pval = crit[0]
            cvdict = crit[1]
    res[residx] = [zastat, pval, cvdict, bpidx]
Example #12
0
    def test_add_lag_1d_drop_struct(self):
        data = np.zeros(100, dtype=[("variable", float)])
        nddata = self.random_data
        data["variable"] = nddata

        lagmat = stattools.lagmat(nddata, 3, trim="Both")
        lag_data = tools.add_lag(data, lags=3, drop=True)
        assert_equal(lagmat, lag_data)
Example #13
0
    def test_add_lag1d_drop(self):
        data = self.random_data
        lagmat = stattools.lagmat(data, 3, trim="Both")
        lag_data = tools.add_lag(data, lags=3, drop=True, insert=True)
        assert_equal(lagmat, lag_data)

        # no insert, should be the same
        lag_data = tools.add_lag(data, lags=3, drop=True, insert=False)
        assert_equal(lagmat, lag_data)
Example #14
0
 def test_add_lag_noinsertatend_ndarray(self):
     data = self.macro_df.values
     nddata = data.astype(float)
     lagmat = stattools.lagmat(nddata[:, -1], 3, trim="Both")
     results = np.column_stack((nddata[3:, :], lagmat))
     lag_data = tools.add_lag(nddata, 3, 3, insert=False)
     assert_equal(lag_data, results)
     # should be the same as insert also check negative col number
     lag_data2 = tools.add_lag(nddata, -1, 3, insert=True)
     assert_equal(lag_data2, results)
Example #15
0
 def test_add_lag_noinsert_atend(self):
     data = self.macro_df.values
     nddata = data.astype(float)
     lagmat = stattools.lagmat(nddata[:, -1], 3, trim="Both")
     results = np.column_stack((nddata[3:, :], lagmat))
     lag_data = tools.add_lag(data, self.cpi_loc, 3, insert=False)
     assert_equal(lag_data, results)
     # should be the same as insert
     lag_data2 = tools.add_lag(data, self.cpi_loc, 3, insert=True)
     assert_equal(lag_data2, results)
Example #16
0
 def test_tstat_exogenous(self):
     exog, endog = lagmat(self.z, 12, original='sep', trim='both')
     exog = np.concatenate([self.x[12:], exog], axis=1)
     icbest, sel_lag = _autolag_ols(endog, exog, 2, 12, 't-stat')
     direct = np.zeros(exog.shape[1])
     for i in range(3, exog.shape[1]):
         res = OLS(endog, exog[:, :i]).fit()
         direct[i] = res.tvalues[-1]
     crit = stats.norm.ppf(0.95)
     assert np.max(np.argwhere(np.abs(direct[2:]) > crit)) == sel_lag
Example #17
0
 def test_tstat_exogenous(self):
     exog, endog = lagmat(self.z, 12, original='sep', trim='both')
     exog = np.concatenate([self.x[12:], exog], axis=1)
     icbest, sel_lag = _autolag_ols(endog, exog, 2, 12, 't-stat')
     direct = np.zeros(exog.shape[1])
     for i in range(3, exog.shape[1]):
         res = OLS(endog, exog[:, :i]).fit()
         direct[i] = res.tvalues[-1]
     crit = stats.norm.ppf(0.95)
     assert np.max(np.argwhere(np.abs(direct[2:]) > crit)) == sel_lag
Example #18
0
 def test_bic_exogenous(self):
     exog, endog = lagmat(self.z, 12, original='sep', trim='both')
     exog = np.concatenate([self.x[12:], exog], axis=1)
     icbest, sel_lag = _autolag_ols(endog, exog, 2, 12, 'bic')
     direct = np.zeros(exog.shape[1])
     direct.fill(np.inf)
     for i in range(3, exog.shape[1]):
         res = OLS(endog, exog[:, :i]).fit()
         direct[i] = res.bic
     assert np.argmin(direct[2:]) == sel_lag
Example #19
0
 def test_bic_exogenous(self):
     exog, endog = lagmat(self.z, 12, original='sep', trim='both')
     exog = np.concatenate([self.x[12:], exog], axis=1)
     icbest, sel_lag = _autolag_ols(endog, exog, 2, 12, 'bic')
     direct = np.zeros(exog.shape[1])
     direct.fill(np.inf)
     for i in range(3, exog.shape[1]):
         res = OLS(endog, exog[:, :i]).fit()
         direct[i] = res.bic
     assert np.argmin(direct[2:]) == sel_lag
Example #20
0
 def test_sep_return(self):
     data = self.random_data
     n = data.shape[0]
     lagmat, leads = stattools.lagmat(data, 3, trim="none", original="sep")
     expected = np.zeros((n + 3, 4))
     for i in range(4):
         expected[i:i + n, i] = data
     expected_leads = expected[:, :1]
     expected_lags = expected[:, 1:]
     assert_equal(expected_lags, lagmat)
     assert_equal(expected_leads, leads)
Example #21
0
    def test_add_lag1d_struct(self):
        data = np.zeros(100, dtype=[("variable", float)])
        nddata = self.random_data
        data["variable"] = nddata

        lagmat = stattools.lagmat(nddata, 3, trim="Both", original="in")
        lag_data = tools.add_lag(data, 0, lags=3, insert=True)
        assert_equal(lagmat, lag_data)

        lag_data = tools.add_lag(data, 0, lags=3, insert=False)
        assert_equal(lagmat, lag_data)

        lag_data = tools.add_lag(data, lags=3, insert=True)
        assert_equal(lagmat, lag_data)
Example #22
0
    def _lag(self, X, numlags, holdout=None):

        if len(X.shape) == 1:
            ncols = 1
        else:
            ncols = X.shape[1]

        ind = X.iloc[numlags:].index

        lags, X_trim = lagmat(X, numlags, trim="both", original="sep")

        if holdout is not None:
            return pd.DataFrame(index=ind, data=lags[:, (holdout * ncols) :]), X_trim
        else:
            return pd.DataFrame(index=ind, data=lags), X_trim.flatten()
Example #23
0
    def test_dataframe_without_pandas(self):
        data = self.macro_df
        both = stattools.lagmat(data, 3, trim="both", original="in")
        both_np = stattools.lagmat(data.values, 3, trim="both", original="in")
        assert_equal(both, both_np)

        lags = stattools.lagmat(data, 3, trim="none", original="ex")
        lags_np = stattools.lagmat(data.values, 3, trim="none", original="ex")
        assert_equal(lags, lags_np)

        lags, lead = stattools.lagmat(data, 3, trim="forward", original="sep")
        lags_np, lead_np = stattools.lagmat(data.values,
                                            3,
                                            trim="forward",
                                            original="sep")
        assert_equal(lags, lags_np)
        assert_equal(lead, lead_np)
# maak 1 legenda midden bovenin het figuur met de 3 entries naast elkaar 
fig.legend(handles = h, labels = l, loc=9, ncol=3)

# maak mooi en sla op
fig.tight_layout()
fig.savefig('pca-decomposition.png')    

#%%
nlags=4
# verzamel alle mogelijke regressies met robuste errors in een dictionary. deze kun je dan als volgt uitlezen: regressie = estims['stability measure naam']['type regressie']
estims = {}
for col in dfz.columns:
    estims[col] = {     'direct':sm.OLS(dfz[col], sm.add_constant(sep), missing='drop').fit().get_robustcov_results(), 
                        'IV':    sm.OLS(dfz[col], sm.add_constant(IV), missing='drop').fit().get_robustcov_results(),
                        'rem':   sm.OLS(dfz[col], sm.add_constant(rem), missing='drop').fit().get_robustcov_results(),
                        'VARX-IV':    sm.OLS(dfz[col], sm.add_constant(IV.join(pd.DataFrame(lagmat(dfz[col], maxlag=nlags),columns=['lag_1','lag_2','lag_3','lag_4'], index=dfz.index))), missing='drop').fit().get_robustcov_results(), 
                        'VARX-rem':   sm.OLS(dfz[col], sm.add_constant(rem.join(pd.DataFrame(lagmat(dfz[col], maxlag=nlags),columns=['lag_1','lag_2','lag_3','lag_4'], index=dfz.index))), missing='drop').fit().get_robustcov_results(),
                        'VARX-direct':sm.OLS(dfz[col], sm.add_constant(sep.join(pd.DataFrame(lagmat(dfz[col], maxlag=nlags),columns=['lag_1','lag_2','lag_3','lag_4'], index=dfz.index))), missing='drop').fit().get_robustcov_results() }
#logit regressie doen
logitregressie={}
logitregressie['NBER recessie'] = {'logit no lag': sm.Logit(dfz2['NBER_RECESSIONS'], sm.add_constant(sep), missing='drop').fit()}
sm.Logit(dfz2['NBER_RECESSIONS'], sm.add_constant(sep), missing='drop').fit().summary()
nlaglogit=1
logitregressie['NBER recessie'] = {'logit lag': sm.Logit(dfz2['NBER_RECESSIONS'], sm.add_constant(sep.join(pd.DataFrame(lagmat(dfz2['NBER_RECESSIONS'], maxlag=nlaglogit),columns=['lag_1'], index=dfz.index))), missing='drop').fit()}
sm.Logit(dfz2['NBER_RECESSIONS'], sm.add_constant(sep.join(pd.DataFrame(lagmat(dfz2['NBER_RECESSIONS'], maxlag=nlaglogit),columns=['lag_1'], index=dfz.index))), missing='drop').fit().summary()


#%% doe joint significantie test op lags en iv/resid met/zonder lags

# initialiseer de dictionary waar we de tests in gaan opslaan    
tests = dict(zip(dfz.columns, [{} for _ in dfz.columns]))
Example #25
0
    def run(self, x, trim=0.15, maxlag=None, regression='c', autolag='AIC'):
        """
        Zivot-Andrews structural-break unit-root test
        The Zivot-Andrews test can be used to test for a unit root in a
        univariate process in the presence of serial correlation and a
        single structural break.

        Parameters
        ----------
        x : array_like
            data series
        trim : float
            percentage of series at begin/end to exclude from break-period
            calculation in range [0, 0.333] (default=0.15)
        maxlag : int
            maximum lag which is included in test, default=12*(nobs/100)^{1/4}
            (Schwert, 1989)
        regression : {'c','t','ct'}
            Constant and trend order to include in regression
            * 'c' : constant only (default)
            * 't' : trend only
            * 'ct' : constant and trend
        autolag : {'AIC', 'BIC', 't-stat', None}
            * if None, then maxlag lags are used
            * if 'AIC' (default) or 'BIC', then the number of lags is chosen
              to minimize the corresponding information criterion
            * 't-stat' based choice of maxlag.  Starts with maxlag and drops a
              lag until the t-statistic on the last lag length is significant
              using a 5%-sized test

        Returns
        -------
        zastat : float
            test statistic
        pvalue : float
            based on MC-derived critical values
        cvdict : dict
            critical values for the test statistic at the 1%, 5%, and 10%
            levels
        bpidx : int
            index of x corresponding to endogenously calculated break period
            with values in the range [0..nobs-1]
        baselag : int
            number of lags used for period regressions

        Notes
        -----
        H0 = unit root with a single structural break

        Algorithm follows Baum (2004/2015) approximation to original
        Zivot-Andrews method. Rather than performing an autolag regression at
        each candidate break period (as per the original paper), a single
        autolag regression is run up-front on the base model (constant + trend
        with no dummies) to determine the best lag length. This lag length is
        then used for all subsequent break-period regressions. This results in
        significant run time reduction but also slightly more pessimistic test
        statistics than the original Zivot-Andrews method, although no attempt
        has been made to characterize the size/power tradeoff.

        References
        ----------
        Baum, C.F. (2004). ZANDREWS: Stata module to calculate Zivot-Andrews
        unit root test in presence of structural break," Statistical Software
        Components S437301, Boston College Department of Economics, revised
        2015.

        Schwert, G.W. (1989). Tests for unit roots: A Monte Carlo
        investigation. Journal of Business & Economic Statistics, 7: 147-159.

        Zivot, E., and Andrews, D.W.K. (1992). Further evidence on the great
        crash, the oil-price shock, and the unit-root hypothesis. Journal of
        Business & Economic Studies, 10: 251-270.
        """
        if regression not in ['c', 't', 'ct']:
            raise ValueError('ZA: regression option \'%s\' not understood' %
                             regression)
        if not isinstance(trim, float) or trim < 0 or trim > (1. / 3.):
            raise ValueError(
                'ZA: trim value must be a float in range [0, 0.333]')
        x = np.asarray(x)
        if x.ndim > 2 or (x.ndim == 2 and x.shape[1] != 1):
            raise ValueError(
                'ZA: x must be a 1d array or a 2d array with a single column')
        x = np.reshape(x, (-1, 1))
        nobs = x.shape[0]
        if autolag:
            baselags = tsa.adfuller(x[:, 0],
                                    maxlag=maxlag,
                                    regression='ct',
                                    autolag=autolag)[2]
        elif maxlag:
            baselags = maxlag
        else:
            baselags = int(12. * np.power(nobs / 100., 1 / 4.))
        trimcnt = int(nobs * trim)
        start_period = trimcnt
        end_period = nobs - trimcnt
        if regression == 'ct':
            basecols = 5
        else:
            basecols = 4
        # first-diff y and standardize for numerical stability
        dy = np.diff(x, axis=0)[:, 0]
        dy /= np.sqrt(dy.T.dot(dy))
        x = x / np.sqrt(x.T.dot(x))
        # reserve exog space
        exog = np.zeros((dy[baselags:].shape[0], basecols + baselags))
        # normalize constant for stability in long time series
        c_const = 1 / np.sqrt(nobs)  # Normalize
        exog[:, 0] = c_const
        # lagged y and dy
        exog[:, basecols - 1] = x[baselags:(nobs - 1), 0]
        exog[:, basecols:] = tsa.lagmat(
            dy, baselags, trim='none')[baselags:exog.shape[0] + baselags]
        # better time trend: t_const @ t_const = 1 for large nobs
        t_const = np.arange(1.0, nobs + 2)
        t_const *= np.sqrt(3) / nobs**(3 / 2)
        # iterate through the time periods
        stats = np.full(end_period + 1, np.inf)
        for bp in range(start_period + 1, end_period + 1):
            # update intercept dummy / trend / trend dummy
            cutoff = (bp - (baselags + 1))
            if regression != 't':
                exog[:cutoff, 1] = 0
                exog[cutoff:, 1] = c_const
                exog[:, 2] = t_const[(baselags + 2):(nobs + 1)]
                if regression == 'ct':
                    exog[:cutoff, 3] = 0
                    exog[cutoff:, 3] = t_const[1:(nobs - bp + 1)]
            else:
                exog[:, 1] = t_const[(baselags + 2):(nobs + 1)]
                exog[:(cutoff - 1), 2] = 0
                exog[(cutoff - 1):, 2] = t_const[0:(nobs - bp + 1)]
            # check exog rank on first iteration
            if bp == start_period + 1:
                o = lm.OLS(dy[baselags:], exog, hasconst=1).fit()
                if o.df_model < exog.shape[1] - 1:
                    raise ValueError(
                        'ZA: auxiliary exog matrix is not full rank.\n \
                        cols (exc intercept) = {}  rank = {}'.format(
                            exog.shape[1] - 1, o.df_model))
                stats[bp] = o.tvalues[basecols - 1]
            else:
                stats[bp] = self.__quick_ols(dy[baselags:], exog)[basecols - 1]
        # return best seen
        zastat = np.min(stats)
        bpidx = np.argmin(stats) - 1
        crit = self.__za_crit(zastat, regression)
        pval = crit[0]
        cvdict = crit[1]
        return zastat, pval, cvdict, baselags, bpidx
Example #26
0
def za(x, trim=0.15, maxlag=None, regression='c', autolag='AIC'):
    """
    Zivot-Andrews structural-break unit-root test

    The Zivot-Andrews test can be used to test for a unit root in a
    univariate process in the presence of serial correlation and a
    single structural break.

    Parameters
    ----------
    x : array_like
        data series
    trim : float
        percentage of series at begin/end to exclude from break-period
        calculation in range [0, 0.333] (default=0.15)
    maxlag : int
        maximum lag which is included in test, default=12*(nobs/100)^{1/4}
        (Schwert, 1989)
    regression : {'c','t','ct'}
        Constant and trend order to include in regression
        * 'c' : constant only (default)
        * 't' : trend only
        * 'ct' : constant and trend
    autolag : {'AIC', 'BIC', 't-stat', None}
        * if None, then maxlag lags are used
        * if 'AIC' (default) or 'BIC', then the number of lags is chosen
          to minimize the corresponding information criterion
        * 't-stat' based choice of maxlag.  Starts with maxlag and drops a
          lag until the t-statistic on the last lag length is significant
          using a 5%-sized test

    Returns
    -------
    zastat : float
        test statistic
    pvalue : float
        based on MC-derived critical values
    cvdict : dict
        critical values for the test statistic at the 1%, 5%, and 10% levels
    bpidx : int
        index of x corresponding to endogenously calculated break period
    baselag : int
        number of lags used for period regressions

    Notes
    -----
    H0 = unit root with a single structural break

    Algorithm follows Baum (2004/2015) approximation to original Zivot-Andrews
    method. Rather than performing an autolag regression at each candidate
    break period (as per the original paper), a single autolag regression is
    run up-front on the base model (constant + trend with no dummies) to
    determine the best lag length. This lag length is then used for all
    subsequent break-period regressions. This results in significant run time
    reduction but also slightly more pessimistic test statistics than the
    original Zivot-Andrews method, although no attempt has been made to
    characterize the size/power tradeoff.

    References
    ----------
    Baum, C.F. (2004). ZANDREWS: Stata module to calculate Zivot-Andrews unit
    root test in presence of structural break," Statistical Software Components
    S437301, Boston College Department of Economics, revised 2015.

    Schwert, G.W. (1989). Tests for unit roots: A Monte Carlo investigation.
    Journal of Business & Economic Statistics, 7: 147-159.

    Zivot, E., and Andrews, D.W.K. (1992). Further evidence on the great crash,
    the oil-price shock, and the unit-root hypothesis. Journal of Business &
    Economic Studies, 10: 251-270.
    """

    if regression not in ['c', 't', 'ct']:
        raise ValueError('ZA: regression option \'%s\' not understood' %
                         regression)
    if not isinstance(trim, float) or trim < 0 or trim > (1. / 3.):
        raise ValueError('ZA: trim value must be a float in range [0, 0.333]')
    x = np.asarray(x)
    nobs = x.shape[0]
    if autolag:
        baselags = tsa.adfuller(x[:, 0],
                                maxlag=maxlag,
                                regression='ct',
                                autolag=autolag)[2]
    elif maxlag:
        baselags = maxlag
    else:
        baselags = int(12. * np.power(nobs / 100., 1 / 4.))
    trimcnt = int(nobs * trim)
    start_period = trimcnt
    end_period = nobs - trimcnt
    if regression == 'ct':
        basecols = 5
    else:
        basecols = 4
    # first-diff y
    dy = np.diff(x, axis=0)[:, 0]
    zastat = np.inf
    for bp in range(start_period, end_period + 1):
        # reserve exog space
        exog = np.zeros((dy[baselags:].shape[0], basecols + baselags))
        # constant
        exog[:, 0] = 1
        # intercept dummy / trend / trend dummy
        if regression != 't':
            exog[(bp - (baselags + 1)):, 1] = 1
            exog[:, 2] = np.arange(baselags + 2, nobs + 1)
            if regression == 'ct':
                exog[(bp - (baselags + 1)):, 3] = np.arange(1, nobs - bp + 1)
        else:
            exog[:, 1] = np.arange(baselags + 2, nobs + 1)
            exog[(bp - (baselags + 1)):, 2] = np.arange(1, nobs - bp + 1)
        # lagged y
        exog[:, basecols - 1] = x[baselags:(nobs - 1), 0]
        # lagged dy
        exog[:, basecols:] = tsa.lagmat(
            dy, baselags, trim='none')[baselags:exog.shape[0] + baselags]
        stat = lm.OLS(dy[baselags:], exog).fit().tvalues[basecols - 1]
        if stat < zastat:
            zastat = stat
            bpidx = bp - 1
            crit = zacrit.za_crit(zastat, regression)
            pval = crit[0]
            cvdict = crit[1]
    return zastat, pval, cvdict, baselags, bpidx