Beispiel #1
0
def recursive_filter(x, ar_coeff, init=None):
    '''
    Autoregressive, or recursive, filtering.

    Parameters
    ----------
    x : array_like
        Time-series data. Should be 1d or n x 1.
    ar_coeff : array_like
        AR coefficients in reverse time order. See Notes
    init : array_like
        Initial values of the time-series prior to the first value of y.
        The default is zero.

    Returns
    -------
    y : array
        Filtered array, number of columns determined by x and ar_coeff. If a
        pandas object is given, a pandas object is returned.

    Notes
    -----

    Computes the recursive filter ::

        y[n] = ar_coeff[0] * y[n-1] + ...
                + ar_coeff[n_coeff - 1] * y[n - n_coeff] + x[n]

    where n_coeff = len(n_coeff).
    '''
    pw = PandasWrapper(x)
    x = array_like(x, 'x')
    ar_coeff = array_like(ar_coeff, 'ar_coeff')

    if init is not None:  # integer init are treated differently in lfiltic
        init = array_like(init, 'init')
        if len(init) != len(ar_coeff):
            raise ValueError("ar_coeff must be the same length as init")

    if init is not None:
        zi = signal.lfiltic([1], np.r_[1, -ar_coeff], init, x)
    else:
        zi = None

    y = signal.lfilter([1.], np.r_[1, -ar_coeff], x, zi=zi)

    if init is not None:
        result = y[0]
    else:
        result = y

    return pw.wrap(result)
Beispiel #2
0
def seasonal_decompose(x, model="additive", filt=None, period=None,
                       two_sided=True, extrapolate_trend=0):
    """
    Seasonal decomposition using moving averages.

    Parameters
    ----------
    x : array_like
        Time series. If 2d, individual series are in columns. x must contain 2
        complete cycles.
    model : {"additive", "multiplicative"}, optional
        Type of seasonal component. Abbreviations are accepted.
    filt : array_like, optional
        The filter coefficients for filtering out the seasonal component.
        The concrete moving average method used in filtering is determined by
        two_sided.
    period : int, optional
        Period of the series. Must be used if x is not a pandas object or if
        the index of x does not have  a frequency. Overrides default
        periodicity of x if x is a pandas object with a timeseries index.
    two_sided : bool, optional
        The moving average method used in filtering.
        If True (default), a centered moving average is computed using the
        filt. If False, the filter coefficients are for past values only.
    extrapolate_trend : int or 'freq', optional
        If set to > 0, the trend resulting from the convolution is
        linear least-squares extrapolated on both ends (or the single one
        if two_sided is False) considering this many (+1) closest points.
        If set to 'freq', use `freq` closest points. Setting this parameter
        results in no NaN values in trend or resid components.

    Returns
    -------
    DecomposeResult
        A object with seasonal, trend, and resid attributes.

    See Also
    --------
    statsmodels.tsa.filters.bk_filter.bkfilter
    statsmodels.tsa.filters.cf_filter.xffilter
    statsmodels.tsa.filters.hp_filter.hpfilter
    statsmodels.tsa.filters.convolution_filter
    statsmodels.tsa.seasonal.STL

    Notes
    -----
    This is a naive decomposition. More sophisticated methods should
    be preferred.

    The additive model is Y[t] = T[t] + S[t] + e[t]

    The multiplicative model is Y[t] = T[t] * S[t] * e[t]

    The seasonal component is first removed by applying a convolution
    filter to the data. The average of this smoothed series for each
    period is the returned seasonal component.
    """
    pfreq = period
    pw = PandasWrapper(x)
    if period is None:
        pfreq = getattr(getattr(x, 'index', None), 'inferred_freq', None)

    x = array_like(x, 'x', maxdim=2)
    nobs = len(x)

    if not np.all(np.isfinite(x)):
        raise ValueError("This function does not handle missing values")
    if model.startswith('m'):
        if np.any(x <= 0):
            raise ValueError("Multiplicative seasonality is not appropriate "
                             "for zero and negative values")

    if period is None:
        if pfreq is not None:
            pfreq = freq_to_period(pfreq)
            period = pfreq
        else:
            raise ValueError("You must specify a period or x must be a "
                             "pandas object with a DatetimeIndex with "
                             "a freq not set to None")
    if x.shape[0] < 2 * pfreq:
        raise ValueError('x must have 2 complete cycles requires {0} '
                         'observations. x only has {1} '
                         'observation(s)'.format(2 * pfreq, x.shape[0]))

    if filt is None:
        if period % 2 == 0:  # split weights at ends
            filt = np.array([.5] + [1] * (period - 1) + [.5]) / period
        else:
            filt = np.repeat(1. / period, period)

    nsides = int(two_sided) + 1
    trend = convolution_filter(x, filt, nsides)

    if extrapolate_trend == 'freq':
        extrapolate_trend = period - 1

    if extrapolate_trend > 0:
        trend = _extrapolate_trend(trend, extrapolate_trend + 1)

    if model.startswith('m'):
        detrended = x / trend
    else:
        detrended = x - trend

    period_averages = seasonal_mean(detrended, period)

    if model.startswith('m'):
        period_averages /= np.mean(period_averages, axis=0)
    else:
        period_averages -= np.mean(period_averages, axis=0)

    seasonal = np.tile(period_averages.T, nobs // period + 1).T[:nobs]

    if model.startswith('m'):
        resid = x / seasonal / trend
    else:
        resid = detrended - seasonal

    results = []
    for s, name in zip((seasonal, trend, resid, x),
                       ('seasonal', 'trend', 'resid', None)):
        results.append(pw.wrap(s.squeeze(), columns=name))
    return DecomposeResult(seasonal=results[0], trend=results[1],
                           resid=results[2], observed=results[3])
Beispiel #3
0
def hpfilter(x, lamb=1600):
    """
    Hodrick-Prescott filter.

    Parameters
    ----------
    x : array_like
        The time series to filter, 1-d.
    lamb : float
        The Hodrick-Prescott smoothing parameter. A value of 1600 is
        suggested for quarterly data. Ravn and Uhlig suggest using a value
        of 6.25 (1600/4**4) for annual data and 129600 (1600*3**4) for monthly
        data.

    Returns
    -------
    cycle : ndarray
        The estimated cycle in the data given lamb.
    trend : ndarray
        The estimated trend in the data given lamb.

    See Also
    --------
    statsmodels.tsa.filters.bk_filter.bkfilter
        Baxter-King filter.
    statsmodels.tsa.filters.cf_filter.cffilter
        The Christiano Fitzgerald asymmetric, random walk filter.
    statsmodels.tsa.seasonal.seasonal_decompose
        Decompose a time series using moving averages.
    statsmodels.tsa.seasonal.STL
        Season-Trend decomposition using LOESS.

    Notes
    -----
    The HP filter removes a smooth trend, `T`, from the data `x`. by solving

    min sum((x[t] - T[t])**2 + lamb*((T[t+1] - T[t]) - (T[t] - T[t-1]))**2)
     T   t

    Here we implemented the HP filter as a ridge-regression rule using
    scipy.sparse. In this sense, the solution can be written as

    T = inv(I - lamb*K'K)x

    where I is a nobs x nobs identity matrix, and K is a (nobs-2) x nobs matrix
    such that

    K[i,j] = 1 if i == j or i == j + 2
    K[i,j] = -2 if i == j + 1
    K[i,j] = 0 otherwise

    References
    ----------
    Hodrick, R.J, and E. C. Prescott. 1980. "Postwar U.S. Business Cycles: An
        Empirical Investigation." `Carnegie Mellon University discussion
        paper no. 451`.
    Ravn, M.O and H. Uhlig. 2002. "Notes On Adjusted the Hodrick-Prescott
        Filter for the Frequency of Observations." `The Review of Economics and
        Statistics`, 84(2), 371-80.

    Examples
    --------
    >>> import statsmodels.api as sm
    >>> import pandas as pd
    >>> dta = sm.datasets.macrodata.load_pandas().data
    >>> index = pd.DatetimeIndex(start='1959Q1', end='2009Q4', freq='Q')
    >>> dta.set_index(index, inplace=True)

    >>> cycle, trend = sm.tsa.filters.hpfilter(dta.realgdp, 1600)
    >>> gdp_decomp = dta[['realgdp']]
    >>> gdp_decomp["cycle"] = cycle
    >>> gdp_decomp["trend"] = trend

    >>> import matplotlib.pyplot as plt
    >>> fig, ax = plt.subplots()
    >>> gdp_decomp[["realgdp", "trend"]]["2000-03-31":].plot(ax=ax,
    ...                                                      fontsize=16)
    >>> plt.show()

    .. plot:: plots/hpf_plot.py
    """
    pw = PandasWrapper(x)
    x = array_like(x, 'x', ndim=1)
    nobs = len(x)
    I = sparse.eye(nobs, nobs)  # noqa:E741
    offsets = np.array([0, 1, 2])
    data = np.repeat([[1.], [-2.], [1.]], nobs, axis=1)
    K = sparse.dia_matrix((data, offsets), shape=(nobs - 2, nobs))

    use_umfpack = True
    trend = spsolve(I + lamb * K.T.dot(K), x, use_umfpack=use_umfpack)

    cycle = x - trend
    return pw.wrap(cycle, append='cycle'), pw.wrap(trend, append='trend')
Beispiel #4
0
def bkfilter(x, low=6, high=32, K=12):
    """
    Baxter-King bandpass filter

    Parameters
    ----------
    x : array_like
        A 1 or 2d ndarray. If 2d, variables are assumed to be in columns.
    low : float
        Minimum period for oscillations, ie., Baxter and King suggest that
        the Burns-Mitchell U.S. business cycle has 6 for quarterly data and
        1.5 for annual data.
    high : float
        Maximum period for oscillations BK suggest that the U.S.
        business cycle has 32 for quarterly data and 8 for annual data.
    K : int
        Lead-lag length of the filter. Baxter and King propose a truncation
        length of 12 for quarterly data and 3 for annual data.

    Returns
    -------
    c : array
        Cyclical component of x

    References
    ---------- ::
    Baxter, M. and R. G. King. "Measuring Business Cycles: Approximate
        Band-Pass Filters for Economic Time Series." *Review of Economics and
        Statistics*, 1999, 81(4), 575-593.

    Notes
    -----
    Returns a centered weighted moving average of the original series. Where
    the weights a[j] are computed ::

      a[j] = b[j] + theta, for j = 0, +/-1, +/-2, ... +/- K
      b[0] = (omega_2 - omega_1)/pi
      b[j] = 1/(pi*j)(sin(omega_2*j)-sin(omega_1*j), for j = +/-1, +/-2,...

    and theta is a normalizing constant ::

      theta = -sum(b)/(2K+1)

    Examples
    --------
    >>> import statsmodels.api as sm
    >>> import pandas as pd
    >>> dta = sm.datasets.macrodata.load_pandas().data
    >>> index = pd.DatetimeIndex(start='1959Q1', end='2009Q4', freq='Q')
    >>> dta.set_index(index, inplace=True)

    >>> cycles = sm.tsa.filters.bkfilter(dta[['realinv']], 6, 24, 12)

    >>> import matplotlib.pyplot as plt
    >>> fig, ax = plt.subplots()
    >>> cycles.plot(ax=ax, style=['r--', 'b-'])
    >>> plt.show()

    .. plot:: plots/bkf_plot.py

    See Also
    --------
    statsmodels.tsa.filters.cf_filter.cffilter
    statsmodels.tsa.filters.hp_filter.hpfilter
    statsmodels.tsa.seasonal.seasonal_decompose
    """
    # TODO: change the docstring to ..math::?
    # TODO: allow windowing functions to correct for Gibb's Phenomenon?
    # adjust bweights (symmetrically) by below before demeaning
    # Lancosz Sigma Factors np.sinc(2*j/(2.*K+1))
    pw = PandasWrapper(x)
    x = array_like(x, 'x', maxdim=2)
    omega_1 = 2. * np.pi / high  # convert from freq. to periodicity
    omega_2 = 2. * np.pi / low
    bweights = np.zeros(2 * K + 1)
    bweights[K] = (omega_2 - omega_1) / np.pi  # weight at zero freq.
    j = np.arange(1, int(K) + 1)
    weights = 1 / (np.pi * j) * (np.sin(omega_2 * j) - np.sin(omega_1 * j))
    bweights[K + j] = weights  # j is an idx
    bweights[:K] = weights[::-1]  # make symmetric weights
    bweights -= bweights.mean()  # make sure weights sum to zero
    if x.ndim == 2:
        bweights = bweights[:, None]
    x = fftconvolve(x, bweights, mode='valid')
    # get a centered moving avg/convolution

    return pw.wrap(x, append='cycle', trim_start=K, trim_end=K)
Beispiel #5
0
def cffilter(x, low=6, high=32, drift=True):
    """
    Christiano Fitzgerald asymmetric, random walk filter

    Parameters
    ----------
    x : array_like
        1 or 2d array to filter. If 2d, variables are assumed to be in columns.
    low : float
        Minimum period of oscillations. Features below low periodicity are
        filtered out. Default is 6 for quarterly data, giving a 1.5 year
        periodicity.
    high : float
        Maximum period of oscillations. Features above high periodicity are
        filtered out. Default is 32 for quarterly data, giving an 8 year
        periodicity.
    drift : bool
        Whether or not to remove a trend from the data. The trend is estimated
        as np.arange(nobs)*(x[-1] - x[0])/(len(x)-1)

    Returns
    -------
    cycle : array
        The features of `x` between periodicities given by low and high
    trend : array
        The trend in the data with the cycles removed.

    Examples
    --------
    >>> import statsmodels.api as sm
    >>> import pandas as pd
    >>> dta = sm.datasets.macrodata.load_pandas().data
    >>> index = pd.DatetimeIndex(start='1959Q1', end='2009Q4', freq='Q')
    >>> dta.set_index(index, inplace=True)

    >>> cf_cycles, cf_trend = sm.tsa.filters.cffilter(dta[["infl", "unemp"]])

    >>> import matplotlib.pyplot as plt
    >>> fig, ax = plt.subplots()
    >>> cf_cycles.plot(ax=ax, style=['r--', 'b-'])
    >>> plt.show()

    .. plot:: plots/cff_plot.py

    See Also
    --------
    statsmodels.tsa.filters.bk_filter.bkfilter
    statsmodels.tsa.filters.hp_filter.hpfilter
    statsmodels.tsa.seasonal.seasonal_decompose

    """
    #TODO: cythonize/vectorize loop?, add ability for symmetric filter,
    #      and estimates of theta other than random walk.
    if low < 2:
        raise ValueError("low must be >= 2")
    pw = PandasWrapper(x)
    x = array_like(x, 'x', ndim=2)
    nobs, nseries = x.shape
    a = 2 * np.pi / high
    b = 2 * np.pi / low

    if drift:  # get drift adjusted series
        x = x - np.arange(nobs)[:, None] * (x[-1] - x[0]) / (nobs - 1)

    J = np.arange(1, nobs + 1)
    Bj = (np.sin(b * J) - np.sin(a * J)) / (np.pi * J)
    B0 = (b - a) / np.pi
    Bj = np.r_[B0, Bj][:, None]
    y = np.zeros((nobs, nseries))

    for i in range(nobs):
        B = -.5 * Bj[0] - np.sum(Bj[1:-i - 2])
        A = -Bj[0] - np.sum(Bj[1:-i - 2]) - np.sum(Bj[1:i]) - B
        y[i] = (Bj[0] * x[i] + np.dot(Bj[1:-i - 2].T, x[i + 1:-1]) +
                B * x[-1] + np.dot(Bj[1:i].T, x[1:i][::-1]) + A * x[0])
    y = y.squeeze()

    cycle, trend = y.squeeze(), x.squeeze() - y

    return pw.wrap(cycle, append='cycle'), pw.wrap(trend, append='trend')
Beispiel #6
0
def convolution_filter(x, filt, nsides=2):
    '''
    Linear filtering via convolution. Centered and backward displaced moving
    weighted average.

    Parameters
    ----------
    x : array_like
        data array, 1d or 2d, if 2d then observations in rows
    filt : array_like
        Linear filter coefficients in reverse time-order. Should have the
        same number of dimensions as x though if 1d and ``x`` is 2d will be
        coerced to 2d.
    nsides : int, optional
        If 2, a centered moving average is computed using the filter
        coefficients. If 1, the filter coefficients are for past values only.
        Both methods use scipy.signal.convolve.

    Returns
    -------
    y : ndarray, 2d
        Filtered array, number of columns determined by x and filt. If a
        pandas object is given, a pandas object is returned. The index of
        the return is the exact same as the time period in ``x``

    Notes
    -----
    In nsides == 1, x is filtered ::

        y[n] = filt[0]*x[n-1] + ... + filt[n_filt-1]*x[n-n_filt]

    where n_filt is len(filt).

    If nsides == 2, x is filtered around lag 0 ::

        y[n] = filt[0]*x[n - n_filt/2] + ... + filt[n_filt / 2] * x[n]
               + ... + x[n + n_filt/2]

    where n_filt is len(filt). If n_filt is even, then more of the filter
    is forward in time than backward.

    If filt is 1d or (nlags,1) one lag polynomial is applied to all
    variables (columns of x). If filt is 2d, (nlags, nvars) each series is
    independently filtered with its own lag polynomial, uses loop over nvar.
    This is different than the usual 2d vs 2d convolution.

    Filtering is done with scipy.signal.convolve, so it will be reasonably
    fast for medium sized data. For large data fft convolution would be
    faster.
    '''
    # for nsides shift the index instead of using 0 for 0 lag this
    # allows correct handling of NaNs
    if nsides == 1:
        trim_head = len(filt) - 1
        trim_tail = None
    elif nsides == 2:
        trim_head = int(np.ceil(len(filt)/2.) - 1) or None
        trim_tail = int(np.ceil(len(filt)/2.) - len(filt) % 2) or None
    else:  # pragma : no cover
        raise ValueError("nsides must be 1 or 2")

    pw = PandasWrapper(x)
    x = array_like(x, 'x', maxdim=2)
    filt = array_like(filt, 'filt', ndim=x.ndim)

    if filt.ndim == 1 or min(filt.shape) == 1:
        result = signal.convolve(x, filt, mode='valid')
    elif filt.ndim == 2:
        nlags = filt.shape[0]
        nvar = x.shape[1]
        result = np.zeros((x.shape[0] - nlags + 1, nvar))
        if nsides == 2:
            for i in range(nvar):
                # could also use np.convolve, but easier for swiching to fft
                result[:, i] = signal.convolve(x[:, i], filt[:, i],
                                               mode='valid')
        elif nsides == 1:
            for i in range(nvar):
                result[:, i] = signal.convolve(x[:, i], np.r_[0, filt[:, i]],
                                               mode='valid')
    result = _pad_nans(result, trim_head, trim_tail)
    return pw.wrap(result)
Beispiel #7
0
def hprescott(X, side=2, smooth=1600, freq=''):
    '''
    Hodrick-Prescott filter with the option to use either the standard two-sided 
    or one-sided implementation. The two-sided implementation leads to equivalent
    results as when using the statsmodel.tsa hpfilter function
    
    Parameters
    ----------
    X : array-like
        The time series to filter (1-d), need to add multivariate functionality.
        
    side : int
           The implementation requested. The function will default to the standard
           two-sided implementation.
           
    smooth : float 
            The Hodrick-Prescott smoothing parameter. A value of 1600 is
            suggested for quarterly data. Ravn and Uhlig suggest using a value
            of 6.25 (1600/4**4) for annual data and 129600 (1600*3**4) for monthly
            data. The function will default to using the quarterly parameter (1600).

    freq : str
           Optional parameter to specify the frequency of the data. Will override
           the smoothing parameter and implement using the suggested value from
           Ravn and Uhlig. Accepts annual (a), quarterly (q), or monthly (m)
           frequencies.

    Returns
    -------
    
    cycle : ndarray
            The estimated cycle in the data given side implementation and the 
            smoothing parameter.
            
    trend : ndarray
            The estimated trend in the data given side implementation and the 
            smoothing parameter.
    
    References
    ----------
    Hodrick, R.J, and E. C. Prescott. 1980. "Postwar U.S. Business Cycles: An
        Empirical Investigation." `Carnegie Mellon University discussion
        paper no. 451`.
        
    Meyer-Gohde, A. 2010. "Matlab code for one-sided HP-filters."
        `Quantitative Macroeconomics & Real Business Cycles, QM&RBC Codes 181`.
    
    Ravn, M.O and H. Uhlig. 2002. "Notes On Adjusted the Hodrick-Prescott
        Filter for the Frequency of Observations." `The Review of Economics and
        Statistics`, 84(2), 371-80.
    
    Examples
    --------
    from statsmodels.api import datasets, tsa
    import pandas as pd
    dta = datasets.macrodata.load_pandas().data
    index = pd.DatetimeIndex(start='1959Q1', end='2009Q4', freq='Q')
    dta.set_index(index, inplace=True)
    
    #Run original tsa.filters two-sided hp filter
    cycle_tsa, trend_ts = tsa.filters.hpfilter(dta.realgdp, 1600)
    #Run two-sided implementation
    cycle2, trend2 = hprescott(dta.realgdp, 2, 1600)
    #Run one-sided implementation
    cycle1, trend1 = hprescott(dta.realgdp, 1, 1600)
    '''

    #Determine smooth if a specific frequency is given
    if freq == 'q':
        smooth = 1600  #quarterly
    elif freq == 'a':
        smooth = 6.25  #annually
    elif freq == 'm':
        smooth = 129600  #monthly
    elif freq != '':
        print(
            '''Invalid frequency parameter inputted. Defaulting to defined smooth
        parameter value or 1600 if no value was provided.''')

    pw = PandasWrapper(X)
    X = array_like(X, 'X', ndim=1)
    T = len(X)

    #Preallocate trend array
    trend = np.zeros(len(X))

    #Rearrange the first order conditions of minimization problem to yield matrix
    #First and last two rows are mirrored
    #Middle rows follow same pattern shifting position by 1 each row

    a1 = np.array([1 + smooth, -2 * smooth, smooth])
    a2 = np.array([-2 * smooth, 1 + 5 * smooth, -4 * smooth, smooth])
    a3 = np.array([smooth, -4 * smooth, 1 + 6 * smooth, -4 * smooth, smooth])

    Abeg = np.concatenate(([np.append([a1], [0])], [a2]))
    Aend = np.concatenate(([a2[3::-1]], [np.append([0], [a1[2::-1]])]))

    Atot = np.zeros((T, T))
    Atot[:2, :4] = Abeg
    Atot[-2:, -4:] = Aend

    for i in range(2, T - 2):
        Atot[i, i - 2:i + 3] = a3

    if (side == 1):
        t = 2
        trend[:t] = X[:t]

        # Third observation minimization problem is as follows
        r3 = np.array([-2 * smooth, 1 + 4 * smooth, -2 * smooth])

        Atmp = np.concatenate(([a1, r3], [a1[2::-1]]))
        Xtmp = X[:t + 1]

        # Solve the system A*Z = X
        trend[t] = cho_solve(cho_factor(Atmp), Xtmp)[t]

        t += 1

        #Pattern begins with fourth observation
        #Create base A matrix with unique first and last two rows
        #Build recursively larger through time period
        Atmp = np.concatenate(
            ([np.append([a1],
                        [0])], [a2], [a2[3::-1]], [np.append([0], a1[2::-1])]))
        Xtmp = X[:t + 1]

        trend[t] = cho_solve(cho_factor(Atmp), Xtmp)[t]

        while (t < T - 1):

            t += 1

            Atmp = np.concatenate((Atot[:t - 1, :t + 1], np.zeros((2, t + 1))))
            Atmp[t - 1:t + 1, t - 3:t + 1] = Aend

            Xtmp = X[:t + 1]
            trend[t] = cho_solve(cho_factor(Atmp), Xtmp)[t]

    elif (side == 2):
        trend = cho_solve(cho_factor(Atot), X)
    else:
        raise ValueError('Side Parameter should be 1 or 2')

    cyclical = X - trend

    return pw.wrap(cyclical, append='cyclical'), pw.wrap(trend, append='trend')