Exemple #1
0
    def test_tstat(self):
        exog, endog = lagmat(self.inflation, 12, original='sep', trim='both')
        icbest, sel_lag = _autolag(OLS, endog, exog, 1, 11, 't-stat')
        icbest2, sel_lag2 = _autolag_ols(endog, exog, 0, 12, 't-stat')
        assert np.isscalar(icbest2)
        assert np.isscalar(sel_lag2)
        assert sel_lag == sel_lag2

        exog, endog = lagmat(self.y, 12, original='sep', trim='both')
        icbest, sel_lag = _autolag(OLS, endog, exog, 1, 11, 't-stat')
        icbest2, sel_lag2 = _autolag_ols(endog, exog, 0, 12, 't-stat')
        assert np.isscalar(icbest2)
        assert np.isscalar(sel_lag2)
        assert sel_lag == sel_lag2
Exemple #2
0
    def test_tstat(self):
        exog, endog = lagmat(self.inflation, 12, original='sep', trim='both')
        icbest, sel_lag = _autolag(OLS, endog, exog, 1, 11, 't-stat')
        icbest2, sel_lag2 = _autolag_ols(endog, exog, 0, 12, 't-stat')
        assert np.isscalar(icbest2)
        assert np.isscalar(sel_lag2)
        assert sel_lag == sel_lag2

        exog, endog = lagmat(self.y, 12, original='sep', trim='both')
        icbest, sel_lag = _autolag(OLS, endog, exog, 1, 11, 't-stat')
        icbest2, sel_lag2 = _autolag_ols(endog, exog, 0, 12, 't-stat')
        assert np.isscalar(icbest2)
        assert np.isscalar(sel_lag2)
        assert sel_lag == sel_lag2
Exemple #3
0
    def test_tstat(self):
        exog, endog = lagmat(self.inflation, 12, original="sep", trim="both")
        _, sel_lag = _autolag(OLS, endog, exog, 1, 11, "t-stat")
        icbest2, sel_lag2 = _autolag_ols(endog, exog, 0, 12, "t-stat")
        assert np.isscalar(icbest2)
        assert np.isscalar(sel_lag2)
        assert sel_lag == sel_lag2

        exog, endog = lagmat(self.y, 12, original="sep", trim="both")
        _, sel_lag = _autolag(OLS, endog, exog, 1, 11, "t-stat")
        icbest2, sel_lag2 = _autolag_ols(endog, exog, 0, 12, "t-stat")
        assert np.isscalar(icbest2)
        assert np.isscalar(sel_lag2)
        assert sel_lag == sel_lag2
Exemple #4
0
def _df_select_lags(y, trend, max_lags, method):
    """
    Helper method to determine the best lag length in DF-like regressions

    Parameters
    ----------
    y : array-like, (nobs,)
        The data for the lag selection exercise
    trend : str, {'nc','c','ct','ctt'}
        The trend order
    max_lags : int
        The maximum number of lags to check.  This setting affects all
        estimation since the sample is adjusted by max_lags when
        fitting the models
    method : str, {'AIC','BIC','t-stat'}
        The method to use when estimating the model

    Returns
    -------
    best_ic : float
        The information criteria at the selected lag
    best_lag : int
        The selected lag
    all_res : list
        List of OLS results from fitting max_lag + 1 models

    Notes
    -----
    See statsmodels.tsa.tsatools._autolag for details.  If max_lags is None, the
    default value of 12 * (nobs/100)**(1/4) is used.
    """
    nobs = y.shape[0]
    delta_y = diff(y)

    if max_lags is None:
        max_lags = int(ceil(12. * power(nobs / 100., 1 / 4.)))

    rhs = lagmat(delta_y[:, None], max_lags, trim='both', original='in')
    nobs = rhs.shape[0]
    rhs[:, 0] = y[-nobs - 1:-1]  # replace 0 with level of y
    lhs = delta_y[-nobs:]

    if trend != 'nc':
        full_rhs = add_trend(rhs, trend, prepend=True)
    else:
        full_rhs = rhs

    start_lag = full_rhs.shape[1] - rhs.shape[1] + 1
    ic_best, best_lag, all_res = _autolag(OLS,
                                          lhs,
                                          full_rhs,
                                          start_lag,
                                          max_lags,
                                          method,
                                          regresults=True)
    # To get the correct number of lags, subtract the start_lag since
    # lags 0,1,...,start_lag-1 were not actual lags, but other variables
    best_lag -= start_lag
    return ic_best, best_lag, all_res
Exemple #5
0
def _df_select_lags(y, trend, max_lags, method):
    """
    Helper method to determine the best lag length in DF-like regressions

    Parameters
    ----------
    y : array-like, (nobs,)
        The data for the lag selection exercise
    trend : str, {'nc','c','ct','ctt'}
        The trend order
    max_lags : int
        The maximum number of lags to check.  This setting affects all
        estimation since the sample is adjusted by max_lags when
        fitting the models
    method : str, {'AIC','BIC','t-stat'}
        The method to use when estimating the model

    Returns
    -------
    best_ic : float
        The information criteria at the selected lag
    best_lag : int
        The selected lag
    all_res : list
        List of OLS results from fitting max_lag + 1 models

    Notes
    -----
    See statsmodels.tsa.tsatools._autolag for details.  If max_lags is None, the
    default value of 12 * (nobs/100)**(1/4) is used.
    """
    nobs = y.shape[0]
    delta_y = diff(y)

    if max_lags is None:
        max_lags = int(ceil(12. * power(nobs / 100., 1 / 4.)))

    rhs = lagmat(delta_y[:, None], max_lags, trim='both', original='in')
    nobs = rhs.shape[0]
    rhs[:, 0] = y[-nobs - 1:-1]  # replace 0 with level of y
    lhs = delta_y[-nobs:]

    if trend != 'nc':
        full_rhs = add_trend(rhs, trend, prepend=True)
    else:
        full_rhs = rhs

    start_lag = full_rhs.shape[1] - rhs.shape[1] + 1
    ic_best, best_lag, all_res = _autolag(OLS, lhs, full_rhs, start_lag,
                                          max_lags, method, regresults=True)
    # To get the correct number of lags, subtract the start_lag since
    # lags 0,1,...,start_lag-1 were not actual lags, but other variables
    best_lag -= start_lag
    return ic_best, best_lag, all_res
Exemple #6
0
def _autolag_ols(endog,
                 exog,
                 startlag,
                 maxlag,
                 method,
                 modargs=(),
                 regresults=False):
    """
    Returns the results for the lag length that maximizes the info criterion.

    Parameters
    ----------
    endog : {ndarray, Series}
        nobs array containing endogenous variable
    exog : {ndarray, DataFrame}
        nobs by (startlag + maxlag) array containing lags and possibly other
        variables
    startlag : int
        The first zero-indexed column to hold a lag.  See Notes.
    maxlag : int
        The highest lag order for lag length selection.
    method : {'aic', 'bic', 't-stat'}
        aic - Akaike Information Criterion
        bic - Bayes Information Criterion
        t-stat - Based on last lag
    regresults : bool, optional
        Flag indicating to return optional return results

    Returns
    -------
    bestlag : int
        The lag length that maximizes the information criterion.

    Notes
    -----
    Does estimation like mod(endog, exog[:,:i], *modargs).fit(*fitargs)
    where i goes from lagstart to lagstart+maxlag+1.  Therefore, lags are
    assumed to be in contiguous columns from low to high lag length with
    the highest lag in the last column.
    """
    method = method.lower()
    if regresults:
        return _autolag(OLS,
                        endog,
                        exog,
                        startlag,
                        maxlag,
                        method,
                        regresults=regresults)

    resid = squeeze(endog.copy())
    x = exog[:, startlag:].copy()
    sigma2 = empty(maxlag + 1)
    tstat = empty(maxlag + 1)
    if len(exog) > 0 and startlag > 0:
        _x = exog[:, :startlag]
        resid -= _x.dot(pinv(_x).dot(resid))
        x -= _x.dot(pinv(_x).dot(x))
    sigma2[0] = (resid**2).mean()
    tstat[0] = inf

    for i in range(maxlag):
        _x = x[:, i:i + 1]
        xpx = _x.T.dot(_x)
        beta = squeeze(_x.T.dot(resid) / xpx)
        resid -= squeeze(beta * _x)
        x[:, i + 1:] -= _x.dot(_x.T.dot(x[:, i + 1:]) / xpx)
        sigma2[i + 1] = (resid**2).mean()
        tstat[i + 1] = beta / sqrt(sigma2[i + 1] / xpx)

    nobs = float(resid.shape[0])
    llf = -nobs / 2.0 * (log(2 * pi) + log(sigma2) + 1)

    if method == 'aic':
        crit = -2 * llf + 2 * arange(float(maxlag + 1))
        icbest, lag = min(zip(crit, arange(maxlag + 1)))
    elif method == 'bic':
        crit = -2 * llf + log(nobs) * arange(float(maxlag + 1))
        icbest, lag = min(zip(crit, arange(maxlag + 1)))
    elif method == 't-stat':
        stop = 1.6448536269514722
        large_tstat = abs(tstat) >= stop
        lag = int(squeeze(max(argwhere(large_tstat))))
        icbest = float(tstat[lag])
    else:
        raise ValueError('Unknown method')

    return icbest, lag
Exemple #7
0
def _autolag_ols(endog, exog, startlag, maxlag, method, modargs=(), regresults=False):
    """
    Returns the results for the lag length that maximizes the info criterion.

    Parameters
    ----------
    endog : array-like
        nobs array containing endogenous variable
    exog : array-like
        nobs by (startlag + maxlag) array containing lags and possibly other
        variables
    startlag : int
        The first zero-indexed column to hold a lag.  See Notes.
    maxlag : int
        The highest lag order for lag length selection.
    method : {'aic', 'bic', 't-stat'}
        aic - Akaike Information Criterion
        bic - Bayes Information Criterion
        t-stat - Based on last lag
    regresults : bool, optional
        Flag indicating to return optional return results

    Returns
    -------
    bestlag : int
        The lag length that maximizes the information criterion.

    Notes
    -----
    Does estimation like mod(endog, exog[:,:i], *modargs).fit(*fitargs)
    where i goes from lagstart to lagstart+maxlag+1.  Therefore, lags are
    assumed to be in contiguous columns from low to high lag length with
    the highest lag in the last column.
    """
    method = method.lower()
    if regresults:
        return _autolag(OLS, endog, exog, startlag, maxlag, method, regresults=regresults)

    resid = squeeze(endog.copy())
    x = exog[:, startlag:].copy()
    sigma2 = empty(maxlag + 1)
    tstat = empty(maxlag + 1)
    if len(exog) > 0 and startlag > 0:
        _x = exog[:, :startlag]
        resid -= _x.dot(pinv(_x).dot(resid))
        x -= _x.dot(pinv(_x).dot(x))
    sigma2[0] = (resid ** 2).mean()
    tstat[0] = inf

    for i in range(maxlag):
        _x = x[:, i:i + 1]
        xpx = _x.T.dot(_x)
        beta = squeeze(_x.T.dot(resid) / xpx)
        resid -= squeeze(beta * _x)
        x[:, i + 1:] -= _x.dot(_x.T.dot(x[:, i + 1:]) / xpx)
        sigma2[i + 1] = (resid ** 2).mean()
        tstat[i + 1] = beta / sqrt(sigma2[i + 1] / xpx)

    nobs = float(resid.shape[0])
    llf = -nobs / 2.0 * (log(2 * pi) + log(sigma2) + 1)

    if method == 'aic':
        crit = -2 * llf + 2 * arange(float(maxlag + 1))
        icbest, lag = min(zip(crit, arange(maxlag + 1)))
    elif method == 'bic':
        crit = -2 * llf + log(nobs) * arange(float(maxlag + 1))
        icbest, lag = min(zip(crit, arange(maxlag + 1)))
    elif method == 't-stat':
        stop = 1.6448536269514722
        large_tstat = abs(tstat) >= stop
        lag = int(squeeze(max(argwhere(large_tstat))))
        icbest = float(tstat[lag])
    else:
        raise ValueError('Unknown method')

    return icbest, lag
Exemple #8
0
def _autolag_ols(endog,
                 exog,
                 startlag,
                 maxlag,
                 method,
                 modargs=(),
                 regresults=False):
    """
    Returns the results for the lag length that maximizes the info criterion.

    Parameters
    ----------
    endog : {ndarray, Series}
        nobs array containing endogenous variable
    exog : {ndarray, DataFrame}
        nobs by (startlag + maxlag) array containing lags and possibly other
        variables
    startlag : int
        The first zero-indexed column to hold a lag.  See Notes.
    maxlag : int
        The highest lag order for lag length selection.
    method : {'aic', 'bic', 't-stat'}
        aic - Akaike Information Criterion
        bic - Bayes Information Criterion
        t-stat - Based on last lag
    regresults : bool, optional
        Flag indicating to return optional return results

    Returns
    -------
    bestlag : int
        The lag length that maximizes the information criterion.

    Notes
    -----
    Does estimation like mod(endog, exog[:,:i], *modargs).fit(*fitargs)
    where i goes from lagstart to lagstart+maxlag+1.  Therefore, lags are
    assumed to be in contiguous columns from low to high lag length with
    the highest lag in the last column.
    """
    method = method.lower()
    if regresults:
        return _autolag(OLS,
                        endog,
                        exog,
                        startlag,
                        maxlag,
                        method,
                        regresults=regresults)

    q, r = qr(exog)
    qpy = q.T.dot(endog)
    ypy = endog.T.dot(endog)
    xpx = exog.T.dot(exog)

    sigma2 = empty(maxlag + 1)
    tstat = empty(maxlag + 1)
    nobs = float(endog.shape[0])
    tstat[0] = inf
    for i in range(startlag, startlag + maxlag + 1):
        b = solve(r[:i, :i], qpy[:i])
        sigma2[i - startlag] = (ypy - b.T.dot(xpx[:i, :i]).dot(b)) / nobs
        if method == 't-stat' and i > startlag:
            xpxi = inv(xpx[:i, :i])
            stderr = sqrt(sigma2[i - startlag] * xpxi[-1, -1])
            tstat[i - startlag] = b[-1] / stderr

    llf = -nobs / 2.0 * (log(2 * pi) + log(sigma2) + 1)

    if method == 'aic':
        crit = -2 * llf + 2 * arange(float(maxlag + 1))
        icbest, lag = min(zip(crit, arange(maxlag + 1)))
    elif method == 'bic':
        crit = -2 * llf + log(nobs) * arange(float(maxlag + 1))
        icbest, lag = min(zip(crit, arange(maxlag + 1)))
    elif method == 't-stat':
        stop = 1.6448536269514722
        large_tstat = abs(tstat) >= stop
        lag = int(squeeze(max(argwhere(large_tstat))))
        icbest = float(tstat[lag])
    else:
        raise ValueError('Unknown method')

    return icbest, lag