def print_decomposition_series(decomp: sm.DecomposeResult, dataframe: pd.DataFrame, series_name: str) -> None: """Plot decomposition series provided by StatsModel DecomposeResult.""" pylab.rcParams["figure.figsize"] = (18, 8) plt.figure(num=None, figsize=(40, 20), dpi=80, facecolor="w", edgecolor="k") decomp.plot() plt.show()
def decompose(df, period=365, lo_frac=0.6, lo_delta=0.01): """Create a seasonal-trend (with Loess, aka "STL") decomposition of observed time series data. This implementation is modeled after the ``statsmodels.tsa.seasonal_decompose`` method but substitutes a Lowess regression for a convolution in its trend estimation. This is an additive model, Y[t] = T[t] + S[t] + e[t] For more details on lo_frac and lo_delta, see: `statsmodels.nonparametric.smoothers_lowess.lowess()` Args: df (pandas.Dataframe): Time series of observed counts. This DataFrame must be continuous (no gaps or missing data), and include a ``pandas.DatetimeIndex``. period (int, optional): Most significant periodicity in the observed time series, in units of 1 observation. Ex: to accomodate strong annual periodicity within years of daily observations, ``period=365``. lo_frac (float, optional): Fraction of data to use in fitting Lowess regression. lo_delta (float, optional): Fractional distance within which to use linear-interpolation instead of weighted regression. Using non-zero ``lo_delta`` significantly decreases computation time. Returns: `statsmodels.tsa.seasonal.DecomposeResult`: An object with DataFrame attributes for the seasonal, trend, and residual components, as well as the average seasonal cycle. """ # use some existing pieces of statsmodels lowess = sm.nonparametric.lowess _pandas_wrapper, _ = _maybe_get_pandas_wrapper_freq(df) # get plain np array observed = np.asanyarray(df).squeeze() # calc trend, remove from observation trend = lowess(observed, [x for x in range(len(observed))], frac=lo_frac, delta=lo_delta * len(observed), return_sorted=False) detrended = observed - trend # period must not be larger than size of series to avoid introducing NaNs period = min(period, len(observed)) # calc one-period seasonality, remove tiled array from detrended period_averages = np.array([pd_nanmean(detrended[i::period]) for i in range(period)]) # 0-center the period avgs period_averages -= np.mean(period_averages) seasonal = np.tile(period_averages, len(observed) // period + 1)[:len(observed)] resid = detrended - seasonal # convert the arrays back to appropriate dataframes, stuff them back into # the statsmodel object results = list(map(_pandas_wrapper, [seasonal, trend, resid, observed])) dr = DecomposeResult(seasonal=results[0], trend=results[1], resid=results[2], observed=results[3], period_averages=period_averages) return dr
def fit(self): """ Estimate a trend component, multiple seasonal components, and a residual component. Returns ------- DecomposeResult Estimation results. """ num_seasons = len(self.periods) iterate = 1 if num_seasons == 1 else self.iterate # Box Cox if self.lmbda == "auto": y, lmbda = boxcox(self._y, lmbda=None) self.est_lmbda = lmbda elif self.lmbda: y = boxcox(self._y, lmbda=self.lmbda) else: y = self._y # Get STL fit params stl_inner_iter = self._stl_kwargs.pop("inner_iter", None) stl_outer_iter = self._stl_kwargs.pop("outer_iter", None) # Iterate over each seasonal component to extract seasonalities seasonal = np.zeros(shape=(num_seasons, self.nobs)) deseas = y for _ in range(iterate): for i in range(num_seasons): deseas = deseas + seasonal[i] res = STL( endog=deseas, period=self.periods[i], seasonal=self.windows[i], **self._stl_kwargs, ).fit(inner_iter=stl_inner_iter, outer_iter=stl_outer_iter) seasonal[i] = res.seasonal deseas = deseas - seasonal[i] seasonal = np.squeeze(seasonal.T) trend = res.trend rw = res.weights resid = deseas - trend # Return pandas if endog is pandas if isinstance(self.endog, (pd.Series, pd.DataFrame)): index = self.endog.index y = pd.Series(y, index=index, name="observed") trend = pd.Series(trend, index=index, name="trend") resid = pd.Series(resid, index=index, name="resid") rw = pd.Series(rw, index=index, name="robust_weight") cols = [f"seasonal_{period}" for period in self.periods] if seasonal.ndim == 1: seasonal = pd.Series(seasonal, index=index, name="seasonal") else: seasonal = pd.DataFrame(seasonal, index=index, columns=cols) # Avoid circular imports from statsmodels.tsa.seasonal import DecomposeResult return DecomposeResult(y, seasonal, trend, resid, rw)
def seasonal_decompose(x, model="additive", filt=None, freq=None, two_sided=True, extrapolate_trend=0): """ Seasonal decomposition using moving averages Parameters ---------- x : array-like Time series. If 2d, individual series are in columns. model : str {"additive", "multiplicative"} Type of seasonal component. Abbreviations are accepted. filt : array-like The filter coefficients for filtering out the seasonal component. The concrete moving average method used in filtering is determined by two_sided. freq : int, optional Frequency of the series. Must be used if x is not a pandas object. Overrides default periodicity of x if x is a pandas object with a timeseries index. two_sided : bool The moving average method used in filtering. If True (default), a centered moving average is computed using the filt. If False, the filter coefficients are for past values only. extrapolate_trend : int or 'freq', optional If set to > 0, the trend resulting from the convolution is linear least-squares extrapolated on both ends (or the single one if two_sided is False) considering this many (+1) closest points. If set to 'freq', use `freq` closest points. Setting this parameter results in no NaN values in trend or resid components. Returns ------- results : obj A object with seasonal, trend, and resid attributes. Notes ----- This is a naive decomposition. More sophisticated methods should be preferred. The additive model is Y[t] = T[t] + S[t] + e[t] The multiplicative model is Y[t] = T[t] * S[t] * e[t] The seasonal component is first removed by applying a convolution filter to the data. The average of this smoothed series for each period is the returned seasonal component. See Also -------- statsmodels.tsa.filters.bk_filter.bkfilter statsmodels.tsa.filters.cf_filter.xffilter statsmodels.tsa.filters.hp_filter.hpfilter statsmodels.tsa.filters.convolution_filter """ if freq is None: _pandas_wrapper, pfreq = _maybe_get_pandas_wrapper_freq(x) else: _pandas_wrapper = _maybe_get_pandas_wrapper(x) pfreq = None x = np.asanyarray(x).squeeze() nobs = len(x) if not np.all(np.isfinite(x)): raise ValueError("This function does not handle missing values") if model.startswith('m'): if np.any(x <= 0): raise ValueError("Multiplicative seasonality is not appropriate " "for zero and negative values") if freq is None: if pfreq is not None: pfreq = freq_to_period(pfreq) freq = pfreq else: raise ValueError("You must specify a freq or x must be a " "pandas object with a timeseries index with " "a freq not set to None") if filt is None: if freq % 2 == 0: # split weights at ends filt = np.array([.5] + [1] * (freq - 1) + [.5]) / freq else: filt = np.repeat(1. / freq, freq) nsides = int(two_sided) + 1 trend = convolution_filter(x, filt, nsides) if extrapolate_trend == 'freq': extrapolate_trend = freq - 1 if extrapolate_trend > 0: trend = _extrapolate_trend(trend, extrapolate_trend + 1) if model.startswith('m'): detrended = x / trend else: detrended = x - trend period_averages = seasonal_mean(detrended, freq) if model.startswith('m'): period_averages /= np.mean(period_averages, axis=0) else: period_averages -= np.mean(period_averages, axis=0) seasonal = np.tile(period_averages.T, nobs // freq + 1).T[:nobs] if model.startswith('m'): resid = x / seasonal / trend else: resid = detrended - seasonal results = lmap(_pandas_wrapper, [seasonal, trend, resid, x]) return DecomposeResult(seasonal=results[0], trend=results[1], resid=results[2], observed=results[3], freq=freq)
def print_decomposition(decomp: sm.DecomposeResult) -> None: """Plot decomposition provided by StatsModel DecomposeResult.""" pylab.rcParams['figure.figsize'] = (18, 8) decomp.plot() plt.show()