def test_odd_length_filter(self): start = datetime(1951, 3, 31) end = datetime(1958, 12, 31) x = self.data[0] res = convolution_filter(x, [.75, .5, .3, .2, .1]) expected = self.expected.conv2_odd np.testing.assert_almost_equal(res.values.squeeze(), expected) np.testing.assert_(res.index[0] == start) np.testing.assert_(res.index[-1] == end) res = convolution_filter(x, [.75, .5, .3, .2, .1], nsides=1) expected = self.expected.conv1_odd np.testing.assert_almost_equal(res.values.squeeze(), expected) np.testing.assert_(res.index[0] == start) np.testing.assert_(res.index[-1] == end) # with no NAs # not a stable filter res = recursive_filter(x, [.75, .5, .3, .2, .1], init=[150, 100, 125, 135, 145]) expected = self.expected.recurse_odd # only have 12 characters in R and this blows up and gets big np.testing.assert_almost_equal(res.values.squeeze(), expected, 4) np.testing.assert_(res.index[0] == start) np.testing.assert_(res.index[-1] == end)
def test_pandas2d(self): start = datetime(1951, 3, 31) end = datetime(1958, 12, 31) x = concat((self.data[0], self.data[0]), axis=1) res = convolution_filter(x, [[.75, .75], [.25, .25]]) assert_(res.index[0] == start) assert_(res.index[-1] == end)
def test_convolution(self): x = self.data.values.squeeze() res = convolution_filter(x, [.75, .25]) expected = self.expected.conv2 np.testing.assert_almost_equal(res, expected) res = convolution_filter(x, [.75, .25], nsides=1) expected = self.expected.conv1 np.testing.assert_almost_equal(res, expected) x = self.datana.values.squeeze() res = convolution_filter(x, [.75, .25]) expected = self.expected.conv2_na np.testing.assert_almost_equal(res, expected) res = convolution_filter(x, [.75, .25], nsides=1) expected = self.expected.conv1_na np.testing.assert_almost_equal(res, expected)
def _moving_average(table, input_cols, weights_array=None, window_size=1, weights='uniform_weights', mode='past_values_only'): out_table = table.copy() nsides = 1 if mode == 'centered_moving_average': nsides = 2 if weights == 'uniform_weights': weights_array = np.ones(window_size) for column in input_cols: out_table[column + '_MA'] = sm.convolution_filter(out_table[column], weights_array, nsides) / sum(weights_array) return{'out_table':out_table}
def test_convolution2d(self): x = self.data.values res = convolution_filter(x, [[.75], [.25]]) expected = self.expected.conv2 np.testing.assert_almost_equal(res, expected[:, None]) res = convolution_filter(np.c_[x, x], [[.75, .75], [.25, .25]]) np.testing.assert_almost_equal(res, np.c_[expected, expected]) res = convolution_filter(x, [[.75], [.25]], nsides=1) expected = self.expected.conv1 np.testing.assert_almost_equal(res, expected[:, None]) x = self.datana.values res = convolution_filter(x, [[.75], [.25]]) expected = self.expected.conv2_na np.testing.assert_almost_equal(res, expected[:, None]) res = convolution_filter(x, [[.75], [.25]], nsides=1) expected = self.expected.conv1_na np.testing.assert_almost_equal(res, expected[:, None])
def test_pandas(self): start = datetime(1951, 3, 31) end = datetime(1958, 12, 31) x = self.data[0] res = convolution_filter(x, [.75, .25]) assert_(res.index[0] == start) assert_(res.index[-1] == end) res = convolution_filter(x, [.75, .25], nsides=1) assert_(res.index[0] == start) # with no nan-padding q1 if not assert_(res.index[-1] == end) res = recursive_filter(x, [.75, .25]) assert_(res.index[0] == start) assert_(res.index[-1] == end) x = self.datana res = recursive_filter(x, [.75, .25]) assert_(res.index[0] == start) assert_(res.index[-1] == end)
def seasonal_decompose(x, model="additive", filt=None, freq=None): """ Parameters ---------- x : array-like Time series model : str {"additive", "multiplicative"} Type of seasonal component. Abbreviations are accepted. filt : array-like The filter coefficients for filtering out the seasonal component. The default is a symmetric moving average. freq : int, optional Frequency of the series. Must be used if x is not a pandas object with a timeseries index. Returns ------- results : obj A object with seasonal, trend, and resid attributes. Notes ----- This is a naive decomposition. More sophisticated methods should be preferred. The additive model is Y[t] = T[t] + S[t] + e[t] The multiplicative model is Y[t] = T[t] * S[t] * e[t] The seasonal component is first removed by applying a convolution filter to the data. The average of this smoothed series for each period is the returned seasonal component. See Also -------- statsmodels.tsa.filters.convolution_filter """ _pandas_wrapper, pfreq = _maybe_get_pandas_wrapper_freq(x) x = np.asanyarray(x).squeeze() nobs = len(x) if not np.all(np.isfinite(x)): raise ValueError("This function does not handle missing values") if model.startswith('m'): if np.any(x <= 0): raise ValueError("Multiplicative seasonality is not appropriate " "for zero and negative values") if pfreq is not None: pfreq = ym_freq_to_period.freq_to_period(pfreq) if freq and pfreq != freq: raise ValueError("Inferred frequency of index and frequency " "don't match. This function does not re-sample") else: freq = pfreq elif freq is None: raise ValueError("You must specify a freq or x must be a " "pandas object with a timeseries index") if filt is None: if freq % 2 == 0: # split weights at ends filt = np.array([.5] + [1] * (freq - 1) + [.5]) / freq else: filt = np.repeat(1. / freq, freq) trend = convolution_filter(x, filt) # nan pad for conformability - convolve doesn't do it if model.startswith('m'): detrended = x / trend else: detrended = x - trend period_averages = seasonal_mean(detrended, freq) if model.startswith('m'): period_averages /= np.mean(period_averages) else: period_averages -= np.mean(period_averages) seasonal = np.tile(period_averages, nobs // freq + 1)[:nobs] if model.startswith('m'): resid = x / seasonal / trend else: resid = detrended - seasonal results = lmap(_pandas_wrapper, [seasonal, trend, resid, x]) return DecomposeResult(seasonal=results[0], trend=results[1], resid=results[2], observed=results[3])
def seasonal_decompose( x, model="additive", filt=None, period=None, two_sided=True, extrapolate_trend=0, ): """ Seasonal decomposition using moving averages. Parameters ---------- x : array_like Time series. If 2d, individual series are in columns. x must contain 2 complete cycles. model : {"additive", "multiplicative"}, optional Type of seasonal component. Abbreviations are accepted. filt : array_like, optional The filter coefficients for filtering out the seasonal component. The concrete moving average method used in filtering is determined by two_sided. period : int, optional Period of the series. Must be used if x is not a pandas object or if the index of x does not have a frequency. Overrides default periodicity of x if x is a pandas object with a timeseries index. two_sided : bool, optional The moving average method used in filtering. If True (default), a centered moving average is computed using the filt. If False, the filter coefficients are for past values only. extrapolate_trend : int or 'freq', optional If set to > 0, the trend resulting from the convolution is linear least-squares extrapolated on both ends (or the single one if two_sided is False) considering this many (+1) closest points. If set to 'freq', use `freq` closest points. Setting this parameter results in no NaN values in trend or resid components. Returns ------- DecomposeResult A object with seasonal, trend, and resid attributes. See Also -------- statsmodels.tsa.filters.bk_filter.bkfilter Baxter-King filter. statsmodels.tsa.filters.cf_filter.cffilter Christiano-Fitzgerald asymmetric, random walk filter. statsmodels.tsa.filters.hp_filter.hpfilter Hodrick-Prescott filter. statsmodels.tsa.filters.convolution_filter Linear filtering via convolution. statsmodels.tsa.seasonal.STL Season-Trend decomposition using LOESS. Notes ----- This is a naive decomposition. More sophisticated methods should be preferred. The additive model is Y[t] = T[t] + S[t] + e[t] The multiplicative model is Y[t] = T[t] * S[t] * e[t] The results are obtained by first estimating the trend by applying a convolution filter to the data. The trend is then removed from the series and the average of this de-trended series for each period is the returned seasonal component. """ pfreq = period pw = PandasWrapper(x) if period is None: pfreq = getattr(getattr(x, "index", None), "inferred_freq", None) x = array_like(x, "x", maxdim=2) nobs = len(x) if not np.all(np.isfinite(x)): raise ValueError("This function does not handle missing values") if model.startswith("m"): if np.any(x <= 0): raise ValueError("Multiplicative seasonality is not appropriate " "for zero and negative values") if period is None: if pfreq is not None: pfreq = freq_to_period(pfreq) period = pfreq else: raise ValueError( "You must specify a period or x must be a pandas object with " "a PeriodIndex or a DatetimeIndex with a freq not set to None") if x.shape[0] < 2 * pfreq: raise ValueError( f"x must have 2 complete cycles requires {2 * pfreq} " f"observations. x only has {x.shape[0]} observation(s)") if filt is None: if period % 2 == 0: # split weights at ends filt = np.array([0.5] + [1] * (period - 1) + [0.5]) / period else: filt = np.repeat(1.0 / period, period) nsides = int(two_sided) + 1 trend = convolution_filter(x, filt, nsides) if extrapolate_trend == "freq": extrapolate_trend = period - 1 if extrapolate_trend > 0: trend = _extrapolate_trend(trend, extrapolate_trend + 1) if model.startswith("m"): detrended = x / trend else: detrended = x - trend period_averages = seasonal_mean(detrended, period) if model.startswith("m"): period_averages /= np.mean(period_averages, axis=0) else: period_averages -= np.mean(period_averages, axis=0) seasonal = np.tile(period_averages.T, nobs // period + 1).T[:nobs] if model.startswith("m"): resid = x / seasonal / trend else: resid = detrended - seasonal results = [] for s, name in zip((seasonal, trend, resid, x), ("seasonal", "trend", "resid", None)): results.append(pw.wrap(s.squeeze(), columns=name)) return DecomposeResult( seasonal=results[0], trend=results[1], resid=results[2], observed=results[3], )
def seasonal_decompose(x, model="additive", filt=None, freq=None, two_sided=True, extrapolate_trend=0): """ Seasonal decomposition using moving averages Parameters ---------- x : array-like Time series. If 2d, individual series are in columns. model : str {"additive", "multiplicative"} Type of seasonal component. Abbreviations are accepted. filt : array-like The filter coefficients for filtering out the seasonal component. The concrete moving average method used in filtering is determined by two_sided. freq : int, optional Frequency of the series. Must be used if x is not a pandas object. Overrides default periodicity of x if x is a pandas object with a timeseries index. two_sided : bool The moving average method used in filtering. If True (default), a centered moving average is computed using the filt. If False, the filter coefficients are for past values only. extrapolate_trend : int or 'freq', optional If set to > 0, the trend resulting from the convolution is linear least-squares extrapolated on both ends (or the single one if two_sided is False) considering this many (+1) closest points. If set to 'freq', use `freq` closest points. Setting this parameter results in no NaN values in trend or resid components. Returns ------- results : obj A object with seasonal, trend, and resid attributes. Notes ----- This is a naive decomposition. More sophisticated methods should be preferred. The additive model is Y[t] = T[t] + S[t] + e[t] The multiplicative model is Y[t] = T[t] * S[t] * e[t] The seasonal component is first removed by applying a convolution filter to the data. The average of this smoothed series for each period is the returned seasonal component. See Also -------- statsmodels.tsa.filters.bk_filter.bkfilter statsmodels.tsa.filters.cf_filter.xffilter statsmodels.tsa.filters.hp_filter.hpfilter statsmodels.tsa.filters.convolution_filter """ if freq is None: _pandas_wrapper, pfreq = _maybe_get_pandas_wrapper_freq(x) else: _pandas_wrapper = _maybe_get_pandas_wrapper(x) pfreq = None x = np.asanyarray(x).squeeze() nobs = len(x) if not np.all(np.isfinite(x)): raise ValueError("This function does not handle missing values") if model.startswith('m'): if np.any(x <= 0): raise ValueError("Multiplicative seasonality is not appropriate " "for zero and negative values") if freq is None: if pfreq is not None: pfreq = freq_to_period(pfreq) freq = pfreq else: raise ValueError("You must specify a freq or x must be a " "pandas object with a timeseries index with" "a freq not set to None") if filt is None: if freq % 2 == 0: # split weights at ends filt = np.array([.5] + [1] * (freq - 1) + [.5]) / freq else: filt = np.repeat(1./freq, freq) nsides = int(two_sided) + 1 trend = convolution_filter(x, filt, nsides) if extrapolate_trend == 'freq': extrapolate_trend = freq - 1 if extrapolate_trend > 0: trend = _extrapolate_trend(trend, extrapolate_trend + 1) if model.startswith('m'): detrended = x / trend else: detrended = x - trend period_averages = seasonal_mean(detrended, freq) if model.startswith('m'): period_averages /= np.mean(period_averages, axis=0) else: period_averages -= np.mean(period_averages, axis=0) seasonal = np.tile(period_averages.T, nobs // freq + 1).T[:nobs] if model.startswith('m'): resid = x / seasonal / trend else: resid = detrended - seasonal results = lmap(_pandas_wrapper, [seasonal, trend, resid, x]) return DecomposeResult(seasonal=results[0], trend=results[1], resid=results[2], observed=results[3])