def recursive_filter(x, ar_coeff, init=None): ''' Autoregressive, or recursive, filtering. Parameters ---------- x : array_like Time-series data. Should be 1d or n x 1. ar_coeff : array_like AR coefficients in reverse time order. See Notes init : array_like Initial values of the time-series prior to the first value of y. The default is zero. Returns ------- y : array Filtered array, number of columns determined by x and ar_coeff. If a pandas object is given, a pandas object is returned. Notes ----- Computes the recursive filter :: y[n] = ar_coeff[0] * y[n-1] + ... + ar_coeff[n_coeff - 1] * y[n - n_coeff] + x[n] where n_coeff = len(n_coeff). ''' pw = PandasWrapper(x) x = array_like(x, 'x') ar_coeff = array_like(ar_coeff, 'ar_coeff') if init is not None: # integer init are treated differently in lfiltic init = array_like(init, 'init') if len(init) != len(ar_coeff): raise ValueError("ar_coeff must be the same length as init") if init is not None: zi = signal.lfiltic([1], np.r_[1, -ar_coeff], init, x) else: zi = None y = signal.lfilter([1.], np.r_[1, -ar_coeff], x, zi=zi) if init is not None: result = y[0] else: result = y return pw.wrap(result)
def test_wrap_pandas_append(): a = gen_data(1, True) a.name = "apple" b = gen_data(1, False) wrapped = PandasWrapper(a).wrap(b, append="appended") expected = "apple_appended" assert wrapped.name == expected a = gen_data(2, True) a.columns = ["apple_" + str(i) for i in range(a.shape[1])] b = gen_data(2, False) wrapped = PandasWrapper(a).wrap(b, append="appended") expected = [c + "_appended" for c in a.columns] assert list(wrapped.columns) == expected
def test_wrap_pandas_append(): a = gen_data(1, True) a.name = 'apple' b = gen_data(1, False) wrapped = PandasWrapper(a).wrap(b, append='appended') expected = 'apple_appended' assert wrapped.name == expected a = gen_data(2, True) a.columns = ['apple_' + str(i) for i in range(a.shape[1])] b = gen_data(2, False) wrapped = PandasWrapper(a).wrap(b, append='appended') expected = [c + '_appended' for c in a.columns] assert list(wrapped.columns) == expected
def test_wrap_pandas_append_non_string(): # GH 6826 a = gen_data(1, True) a.name = 7 b = gen_data(1, False) wrapped = PandasWrapper(a).wrap(b, append="appended") expected = "7_appended" assert wrapped.name == expected a = gen_data(2, True) a.columns = [i for i in range(a.shape[1])] b = gen_data(2, False) wrapped = PandasWrapper(a).wrap(b, append="appended") expected = [f"{c}_appended" for c in a.columns] assert list(wrapped.columns) == expected
def test_wrap_pandas(use_pandas): a = gen_data(1, use_pandas) b = gen_data(1, False) wrapped = PandasWrapper(a).wrap(b) expected_type = pd.Series if use_pandas else np.ndarray assert isinstance(wrapped, expected_type) assert not use_pandas or wrapped.name is None wrapped = PandasWrapper(a).wrap(b, columns="name") assert isinstance(wrapped, expected_type) assert not use_pandas or wrapped.name == "name" wrapped = PandasWrapper(a).wrap(b, columns=["name"]) assert isinstance(wrapped, expected_type) assert not use_pandas or wrapped.name == "name" expected_type = pd.DataFrame if use_pandas else np.ndarray wrapped = PandasWrapper(a).wrap(b[:, None]) assert isinstance(wrapped, expected_type) assert not use_pandas or wrapped.columns[0] == 0 wrapped = PandasWrapper(a).wrap(b[:, None], columns=["name"]) assert isinstance(wrapped, expected_type) assert not use_pandas or wrapped.columns == ["name"] if use_pandas: match = "Can only wrap 1 or 2-d array_like" with pytest.raises(ValueError, match=match): PandasWrapper(a).wrap(b[:, None, None]) match = "obj must have the same number of elements in axis 0 as" with pytest.raises(ValueError, match=match): PandasWrapper(a).wrap(b[:b.shape[0] // 2])
def seasonal_decompose(x, model="additive", filt=None, period=None, two_sided=True, extrapolate_trend=0): """ Seasonal decomposition using moving averages. Parameters ---------- x : array_like Time series. If 2d, individual series are in columns. x must contain 2 complete cycles. model : {"additive", "multiplicative"}, optional Type of seasonal component. Abbreviations are accepted. filt : array_like, optional The filter coefficients for filtering out the seasonal component. The concrete moving average method used in filtering is determined by two_sided. period : int, optional Period of the series. Must be used if x is not a pandas object or if the index of x does not have a frequency. Overrides default periodicity of x if x is a pandas object with a timeseries index. two_sided : bool, optional The moving average method used in filtering. If True (default), a centered moving average is computed using the filt. If False, the filter coefficients are for past values only. extrapolate_trend : int or 'freq', optional If set to > 0, the trend resulting from the convolution is linear least-squares extrapolated on both ends (or the single one if two_sided is False) considering this many (+1) closest points. If set to 'freq', use `freq` closest points. Setting this parameter results in no NaN values in trend or resid components. Returns ------- DecomposeResult A object with seasonal, trend, and resid attributes. See Also -------- statsmodels.tsa.filters.bk_filter.bkfilter statsmodels.tsa.filters.cf_filter.xffilter statsmodels.tsa.filters.hp_filter.hpfilter statsmodels.tsa.filters.convolution_filter statsmodels.tsa.seasonal.STL Notes ----- This is a naive decomposition. More sophisticated methods should be preferred. The additive model is Y[t] = T[t] + S[t] + e[t] The multiplicative model is Y[t] = T[t] * S[t] * e[t] The seasonal component is first removed by applying a convolution filter to the data. The average of this smoothed series for each period is the returned seasonal component. """ pfreq = period pw = PandasWrapper(x) if period is None: pfreq = getattr(getattr(x, 'index', None), 'inferred_freq', None) x = array_like(x, 'x', maxdim=2) nobs = len(x) if not np.all(np.isfinite(x)): raise ValueError("This function does not handle missing values") if model.startswith('m'): if np.any(x <= 0): raise ValueError("Multiplicative seasonality is not appropriate " "for zero and negative values") if period is None: if pfreq is not None: pfreq = freq_to_period(pfreq) period = pfreq else: raise ValueError("You must specify a period or x must be a " "pandas object with a DatetimeIndex with " "a freq not set to None") if x.shape[0] < 2 * pfreq: raise ValueError('x must have 2 complete cycles requires {0} ' 'observations. x only has {1} ' 'observation(s)'.format(2 * pfreq, x.shape[0])) if filt is None: if period % 2 == 0: # split weights at ends filt = np.array([.5] + [1] * (period - 1) + [.5]) / period else: filt = np.repeat(1. / period, period) nsides = int(two_sided) + 1 trend = convolution_filter(x, filt, nsides) if extrapolate_trend == 'freq': extrapolate_trend = period - 1 if extrapolate_trend > 0: trend = _extrapolate_trend(trend, extrapolate_trend + 1) if model.startswith('m'): detrended = x / trend else: detrended = x - trend period_averages = seasonal_mean(detrended, period) if model.startswith('m'): period_averages /= np.mean(period_averages, axis=0) else: period_averages -= np.mean(period_averages, axis=0) seasonal = np.tile(period_averages.T, nobs // period + 1).T[:nobs] if model.startswith('m'): resid = x / seasonal / trend else: resid = detrended - seasonal results = [] for s, name in zip((seasonal, trend, resid, x), ('seasonal', 'trend', 'resid', None)): results.append(pw.wrap(s.squeeze(), columns=name)) return DecomposeResult(seasonal=results[0], trend=results[1], resid=results[2], observed=results[3])
def hpfilter(x, lamb=1600): """ Hodrick-Prescott filter. Parameters ---------- x : array_like The time series to filter, 1-d. lamb : float The Hodrick-Prescott smoothing parameter. A value of 1600 is suggested for quarterly data. Ravn and Uhlig suggest using a value of 6.25 (1600/4**4) for annual data and 129600 (1600*3**4) for monthly data. Returns ------- cycle : ndarray The estimated cycle in the data given lamb. trend : ndarray The estimated trend in the data given lamb. See Also -------- statsmodels.tsa.filters.bk_filter.bkfilter Baxter-King filter. statsmodels.tsa.filters.cf_filter.cffilter The Christiano Fitzgerald asymmetric, random walk filter. statsmodels.tsa.seasonal.seasonal_decompose Decompose a time series using moving averages. statsmodels.tsa.seasonal.STL Season-Trend decomposition using LOESS. Notes ----- The HP filter removes a smooth trend, `T`, from the data `x`. by solving min sum((x[t] - T[t])**2 + lamb*((T[t+1] - T[t]) - (T[t] - T[t-1]))**2) T t Here we implemented the HP filter as a ridge-regression rule using scipy.sparse. In this sense, the solution can be written as T = inv(I - lamb*K'K)x where I is a nobs x nobs identity matrix, and K is a (nobs-2) x nobs matrix such that K[i,j] = 1 if i == j or i == j + 2 K[i,j] = -2 if i == j + 1 K[i,j] = 0 otherwise References ---------- Hodrick, R.J, and E. C. Prescott. 1980. "Postwar U.S. Business Cycles: An Empirical Investigation." `Carnegie Mellon University discussion paper no. 451`. Ravn, M.O and H. Uhlig. 2002. "Notes On Adjusted the Hodrick-Prescott Filter for the Frequency of Observations." `The Review of Economics and Statistics`, 84(2), 371-80. Examples -------- >>> import statsmodels.api as sm >>> import pandas as pd >>> dta = sm.datasets.macrodata.load_pandas().data >>> index = pd.DatetimeIndex(start='1959Q1', end='2009Q4', freq='Q') >>> dta.set_index(index, inplace=True) >>> cycle, trend = sm.tsa.filters.hpfilter(dta.realgdp, 1600) >>> gdp_decomp = dta[['realgdp']] >>> gdp_decomp["cycle"] = cycle >>> gdp_decomp["trend"] = trend >>> import matplotlib.pyplot as plt >>> fig, ax = plt.subplots() >>> gdp_decomp[["realgdp", "trend"]]["2000-03-31":].plot(ax=ax, ... fontsize=16) >>> plt.show() .. plot:: plots/hpf_plot.py """ pw = PandasWrapper(x) x = array_like(x, 'x', ndim=1) nobs = len(x) I = sparse.eye(nobs, nobs) # noqa:E741 offsets = np.array([0, 1, 2]) data = np.repeat([[1.], [-2.], [1.]], nobs, axis=1) K = sparse.dia_matrix((data, offsets), shape=(nobs - 2, nobs)) use_umfpack = True trend = spsolve(I + lamb * K.T.dot(K), x, use_umfpack=use_umfpack) cycle = x - trend return pw.wrap(cycle, append='cycle'), pw.wrap(trend, append='trend')
def bkfilter(x, low=6, high=32, K=12): """ Baxter-King bandpass filter Parameters ---------- x : array_like A 1 or 2d ndarray. If 2d, variables are assumed to be in columns. low : float Minimum period for oscillations, ie., Baxter and King suggest that the Burns-Mitchell U.S. business cycle has 6 for quarterly data and 1.5 for annual data. high : float Maximum period for oscillations BK suggest that the U.S. business cycle has 32 for quarterly data and 8 for annual data. K : int Lead-lag length of the filter. Baxter and King propose a truncation length of 12 for quarterly data and 3 for annual data. Returns ------- c : array Cyclical component of x References ---------- :: Baxter, M. and R. G. King. "Measuring Business Cycles: Approximate Band-Pass Filters for Economic Time Series." *Review of Economics and Statistics*, 1999, 81(4), 575-593. Notes ----- Returns a centered weighted moving average of the original series. Where the weights a[j] are computed :: a[j] = b[j] + theta, for j = 0, +/-1, +/-2, ... +/- K b[0] = (omega_2 - omega_1)/pi b[j] = 1/(pi*j)(sin(omega_2*j)-sin(omega_1*j), for j = +/-1, +/-2,... and theta is a normalizing constant :: theta = -sum(b)/(2K+1) Examples -------- >>> import statsmodels.api as sm >>> import pandas as pd >>> dta = sm.datasets.macrodata.load_pandas().data >>> index = pd.DatetimeIndex(start='1959Q1', end='2009Q4', freq='Q') >>> dta.set_index(index, inplace=True) >>> cycles = sm.tsa.filters.bkfilter(dta[['realinv']], 6, 24, 12) >>> import matplotlib.pyplot as plt >>> fig, ax = plt.subplots() >>> cycles.plot(ax=ax, style=['r--', 'b-']) >>> plt.show() .. plot:: plots/bkf_plot.py See Also -------- statsmodels.tsa.filters.cf_filter.cffilter statsmodels.tsa.filters.hp_filter.hpfilter statsmodels.tsa.seasonal.seasonal_decompose """ # TODO: change the docstring to ..math::? # TODO: allow windowing functions to correct for Gibb's Phenomenon? # adjust bweights (symmetrically) by below before demeaning # Lancosz Sigma Factors np.sinc(2*j/(2.*K+1)) pw = PandasWrapper(x) x = array_like(x, 'x', maxdim=2) omega_1 = 2. * np.pi / high # convert from freq. to periodicity omega_2 = 2. * np.pi / low bweights = np.zeros(2 * K + 1) bweights[K] = (omega_2 - omega_1) / np.pi # weight at zero freq. j = np.arange(1, int(K) + 1) weights = 1 / (np.pi * j) * (np.sin(omega_2 * j) - np.sin(omega_1 * j)) bweights[K + j] = weights # j is an idx bweights[:K] = weights[::-1] # make symmetric weights bweights -= bweights.mean() # make sure weights sum to zero if x.ndim == 2: bweights = bweights[:, None] x = fftconvolve(x, bweights, mode='valid') # get a centered moving avg/convolution return pw.wrap(x, append='cycle', trim_start=K, trim_end=K)
def cffilter(x, low=6, high=32, drift=True): """ Christiano Fitzgerald asymmetric, random walk filter Parameters ---------- x : array_like 1 or 2d array to filter. If 2d, variables are assumed to be in columns. low : float Minimum period of oscillations. Features below low periodicity are filtered out. Default is 6 for quarterly data, giving a 1.5 year periodicity. high : float Maximum period of oscillations. Features above high periodicity are filtered out. Default is 32 for quarterly data, giving an 8 year periodicity. drift : bool Whether or not to remove a trend from the data. The trend is estimated as np.arange(nobs)*(x[-1] - x[0])/(len(x)-1) Returns ------- cycle : array The features of `x` between periodicities given by low and high trend : array The trend in the data with the cycles removed. Examples -------- >>> import statsmodels.api as sm >>> import pandas as pd >>> dta = sm.datasets.macrodata.load_pandas().data >>> index = pd.DatetimeIndex(start='1959Q1', end='2009Q4', freq='Q') >>> dta.set_index(index, inplace=True) >>> cf_cycles, cf_trend = sm.tsa.filters.cffilter(dta[["infl", "unemp"]]) >>> import matplotlib.pyplot as plt >>> fig, ax = plt.subplots() >>> cf_cycles.plot(ax=ax, style=['r--', 'b-']) >>> plt.show() .. plot:: plots/cff_plot.py See Also -------- statsmodels.tsa.filters.bk_filter.bkfilter statsmodels.tsa.filters.hp_filter.hpfilter statsmodels.tsa.seasonal.seasonal_decompose """ #TODO: cythonize/vectorize loop?, add ability for symmetric filter, # and estimates of theta other than random walk. if low < 2: raise ValueError("low must be >= 2") pw = PandasWrapper(x) x = array_like(x, 'x', ndim=2) nobs, nseries = x.shape a = 2 * np.pi / high b = 2 * np.pi / low if drift: # get drift adjusted series x = x - np.arange(nobs)[:, None] * (x[-1] - x[0]) / (nobs - 1) J = np.arange(1, nobs + 1) Bj = (np.sin(b * J) - np.sin(a * J)) / (np.pi * J) B0 = (b - a) / np.pi Bj = np.r_[B0, Bj][:, None] y = np.zeros((nobs, nseries)) for i in range(nobs): B = -.5 * Bj[0] - np.sum(Bj[1:-i - 2]) A = -Bj[0] - np.sum(Bj[1:-i - 2]) - np.sum(Bj[1:i]) - B y[i] = (Bj[0] * x[i] + np.dot(Bj[1:-i - 2].T, x[i + 1:-1]) + B * x[-1] + np.dot(Bj[1:i].T, x[1:i][::-1]) + A * x[0]) y = y.squeeze() cycle, trend = y.squeeze(), x.squeeze() - y return pw.wrap(cycle, append='cycle'), pw.wrap(trend, append='trend')
def convolution_filter(x, filt, nsides=2): ''' Linear filtering via convolution. Centered and backward displaced moving weighted average. Parameters ---------- x : array_like data array, 1d or 2d, if 2d then observations in rows filt : array_like Linear filter coefficients in reverse time-order. Should have the same number of dimensions as x though if 1d and ``x`` is 2d will be coerced to 2d. nsides : int, optional If 2, a centered moving average is computed using the filter coefficients. If 1, the filter coefficients are for past values only. Both methods use scipy.signal.convolve. Returns ------- y : ndarray, 2d Filtered array, number of columns determined by x and filt. If a pandas object is given, a pandas object is returned. The index of the return is the exact same as the time period in ``x`` Notes ----- In nsides == 1, x is filtered :: y[n] = filt[0]*x[n-1] + ... + filt[n_filt-1]*x[n-n_filt] where n_filt is len(filt). If nsides == 2, x is filtered around lag 0 :: y[n] = filt[0]*x[n - n_filt/2] + ... + filt[n_filt / 2] * x[n] + ... + x[n + n_filt/2] where n_filt is len(filt). If n_filt is even, then more of the filter is forward in time than backward. If filt is 1d or (nlags,1) one lag polynomial is applied to all variables (columns of x). If filt is 2d, (nlags, nvars) each series is independently filtered with its own lag polynomial, uses loop over nvar. This is different than the usual 2d vs 2d convolution. Filtering is done with scipy.signal.convolve, so it will be reasonably fast for medium sized data. For large data fft convolution would be faster. ''' # for nsides shift the index instead of using 0 for 0 lag this # allows correct handling of NaNs if nsides == 1: trim_head = len(filt) - 1 trim_tail = None elif nsides == 2: trim_head = int(np.ceil(len(filt)/2.) - 1) or None trim_tail = int(np.ceil(len(filt)/2.) - len(filt) % 2) or None else: # pragma : no cover raise ValueError("nsides must be 1 or 2") pw = PandasWrapper(x) x = array_like(x, 'x', maxdim=2) filt = array_like(filt, 'filt', ndim=x.ndim) if filt.ndim == 1 or min(filt.shape) == 1: result = signal.convolve(x, filt, mode='valid') elif filt.ndim == 2: nlags = filt.shape[0] nvar = x.shape[1] result = np.zeros((x.shape[0] - nlags + 1, nvar)) if nsides == 2: for i in range(nvar): # could also use np.convolve, but easier for swiching to fft result[:, i] = signal.convolve(x[:, i], filt[:, i], mode='valid') elif nsides == 1: for i in range(nvar): result[:, i] = signal.convolve(x[:, i], np.r_[0, filt[:, i]], mode='valid') result = _pad_nans(result, trim_head, trim_tail) return pw.wrap(result)
def hprescott(X, side=2, smooth=1600, freq=''): ''' Hodrick-Prescott filter with the option to use either the standard two-sided or one-sided implementation. The two-sided implementation leads to equivalent results as when using the statsmodel.tsa hpfilter function Parameters ---------- X : array-like The time series to filter (1-d), need to add multivariate functionality. side : int The implementation requested. The function will default to the standard two-sided implementation. smooth : float The Hodrick-Prescott smoothing parameter. A value of 1600 is suggested for quarterly data. Ravn and Uhlig suggest using a value of 6.25 (1600/4**4) for annual data and 129600 (1600*3**4) for monthly data. The function will default to using the quarterly parameter (1600). freq : str Optional parameter to specify the frequency of the data. Will override the smoothing parameter and implement using the suggested value from Ravn and Uhlig. Accepts annual (a), quarterly (q), or monthly (m) frequencies. Returns ------- cycle : ndarray The estimated cycle in the data given side implementation and the smoothing parameter. trend : ndarray The estimated trend in the data given side implementation and the smoothing parameter. References ---------- Hodrick, R.J, and E. C. Prescott. 1980. "Postwar U.S. Business Cycles: An Empirical Investigation." `Carnegie Mellon University discussion paper no. 451`. Meyer-Gohde, A. 2010. "Matlab code for one-sided HP-filters." `Quantitative Macroeconomics & Real Business Cycles, QM&RBC Codes 181`. Ravn, M.O and H. Uhlig. 2002. "Notes On Adjusted the Hodrick-Prescott Filter for the Frequency of Observations." `The Review of Economics and Statistics`, 84(2), 371-80. Examples -------- from statsmodels.api import datasets, tsa import pandas as pd dta = datasets.macrodata.load_pandas().data index = pd.DatetimeIndex(start='1959Q1', end='2009Q4', freq='Q') dta.set_index(index, inplace=True) #Run original tsa.filters two-sided hp filter cycle_tsa, trend_ts = tsa.filters.hpfilter(dta.realgdp, 1600) #Run two-sided implementation cycle2, trend2 = hprescott(dta.realgdp, 2, 1600) #Run one-sided implementation cycle1, trend1 = hprescott(dta.realgdp, 1, 1600) ''' #Determine smooth if a specific frequency is given if freq == 'q': smooth = 1600 #quarterly elif freq == 'a': smooth = 6.25 #annually elif freq == 'm': smooth = 129600 #monthly elif freq != '': print( '''Invalid frequency parameter inputted. Defaulting to defined smooth parameter value or 1600 if no value was provided.''') pw = PandasWrapper(X) X = array_like(X, 'X', ndim=1) T = len(X) #Preallocate trend array trend = np.zeros(len(X)) #Rearrange the first order conditions of minimization problem to yield matrix #First and last two rows are mirrored #Middle rows follow same pattern shifting position by 1 each row a1 = np.array([1 + smooth, -2 * smooth, smooth]) a2 = np.array([-2 * smooth, 1 + 5 * smooth, -4 * smooth, smooth]) a3 = np.array([smooth, -4 * smooth, 1 + 6 * smooth, -4 * smooth, smooth]) Abeg = np.concatenate(([np.append([a1], [0])], [a2])) Aend = np.concatenate(([a2[3::-1]], [np.append([0], [a1[2::-1]])])) Atot = np.zeros((T, T)) Atot[:2, :4] = Abeg Atot[-2:, -4:] = Aend for i in range(2, T - 2): Atot[i, i - 2:i + 3] = a3 if (side == 1): t = 2 trend[:t] = X[:t] # Third observation minimization problem is as follows r3 = np.array([-2 * smooth, 1 + 4 * smooth, -2 * smooth]) Atmp = np.concatenate(([a1, r3], [a1[2::-1]])) Xtmp = X[:t + 1] # Solve the system A*Z = X trend[t] = cho_solve(cho_factor(Atmp), Xtmp)[t] t += 1 #Pattern begins with fourth observation #Create base A matrix with unique first and last two rows #Build recursively larger through time period Atmp = np.concatenate( ([np.append([a1], [0])], [a2], [a2[3::-1]], [np.append([0], a1[2::-1])])) Xtmp = X[:t + 1] trend[t] = cho_solve(cho_factor(Atmp), Xtmp)[t] while (t < T - 1): t += 1 Atmp = np.concatenate((Atot[:t - 1, :t + 1], np.zeros((2, t + 1)))) Atmp[t - 1:t + 1, t - 3:t + 1] = Aend Xtmp = X[:t + 1] trend[t] = cho_solve(cho_factor(Atmp), Xtmp)[t] elif (side == 2): trend = cho_solve(cho_factor(Atot), X) else: raise ValueError('Side Parameter should be 1 or 2') cyclical = X - trend return pw.wrap(cyclical, append='cyclical'), pw.wrap(trend, append='trend')