def smad(ts, m=3.0, period=None, stl_seasonal=25, only_low_values=False, score=False): ''' Seasonal-MAD Input: ts: pd.Series with DateTimeIndex m: stardard deviation period: time series seasonal periodo stl_seasonal: STL Seasonal parameter only_low_values: return anomalies only for low values score: if True returns the decision function Output: ''' # Seasonal component according to the Papper if period is not None: stl = STL(ts, period=period, seasonal=stl_seasonal) else: stl = STL(ts, seasonal=stl_seasonal) res = stl.fit() # fit # calculamos o residuo residuo = ts - np.nanmedian(ts) - res.seasonal # Search outlier with mad mad = MAD(only_low_values=only_low_values) mad.fit(residuo) # return if score: return mad.decision_function(residuo) else: index = mad.predict(residuo, m=m).index return ts.loc[index]
def predict_past(self, df, freq_period, steps): scalerfile = self.directory + '/scaler_pred.sav' if not os.path.isfile(scalerfile) or os.path.isfile(scalerfile): if (df["y"].max() - df["y"].min()) > 100: if self.verbose == 1: print("PowerTransformation scaler used") scaler = PowerTransformer() else: if self.verbose == 1: print("Identity scaler used") scaler = IdentityTransformer() self.scaler2 = scaler.fit(np.reshape(np.array(df["y"]), (-1, 1))) Y = self.scaler2.transform(np.reshape(np.array(df["y"]), (-1, 1))) pickle.dump(self.scaler2, open(scalerfile, 'wb')) elif os.path.isfile(scalerfile): self.scaler2 = pickle.load(open(scalerfile, "rb")) Y = self.scaler2.transform(np.reshape(np.array(df["y"]), (-1, 1))) if freq_period % 2 == 0: freq_period = freq_period + 1 decomposition = STL(Y, period=freq_period + 1) decomposition = decomposition.fit() decomposition.plot() plt.show() df.loc[:, 'trend'] = decomposition.trend df.loc[:, 'seasonal'] = decomposition.seasonal df.loc[:, 'residual'] = decomposition.resid df= df.fillna(method="bfill") self.trend = np.asarray(df.loc[:, 'trend']) self.seasonal = np.asarray(df.loc[:, 'seasonal']) self.residual = np.asarray(df.loc[:, 'residual']) prediction, _, _ = self.make_prediction(steps) return prediction[0]
def fit(self, *, inner_iter=None, outer_iter=None, fit_kwargs=None): """ Estimate STL and forecasting model parameters. Parameters ----------\n%(fit_params)s fit_kwargs : Dict[str, Any] Any additional keyword arguments to pass to ``model``'s ``fit`` method when estimating the model on the decomposed residuals. Returns ------- STLForecastResults Results with forecasting methods. """ fit_kwargs = {} if fit_kwargs is None else fit_kwargs stl = STL(self._endog, **self._stl_kwargs) stl_fit: DecomposeResult = stl.fit(inner_iter=inner_iter, outer_iter=outer_iter) model_endog = stl_fit.trend + stl_fit.resid mod = self._model(model_endog, **self._model_kwargs) res = mod.fit(**fit_kwargs) if not hasattr(res, "forecast"): raise AttributeError( "The model's result must expose a ``forecast`` method.") return STLForecastResults(stl, stl_fit, mod, res, self._endog)
def extract_climate_trend(self, df, trend='STL'): ''' input_params: df: input the dataframe of which the trends are to be extracted from requirements for the dataframe: - dataframe index need to be datetime, - datetime index should be sorted - should be a monthly resampling ''' climate_trend_df = pd.DataFrame() if trend == 'STL': yr_list = df.index.year #print(yr_list[-1]) #print(yr_list[0]) seasons = yr_list[-1] - yr_list[0] if seasons % 2 == 0: seasons += 1 for col in df: stl = STL(df[col], period=12, seasonal=seasons, robust=True) res = stl.fit() #print(res.trend) climate_trend_df[col] = res.trend return climate_trend_df
def decompose_ts(self, df, label, freq='W'): self.freq = freq ts = df[['TaskDate', 'TaskCount']].set_index('TaskDate').resample(freq).sum() sns.set(rc={"figure.figsize": (10, 8)}) print(f"{freq} decomposition of {label}") try: # Decomposition 1 result = seasonal_decompose( ts, model='additive' ) # {model='additive', model='multiplicative'}, optional fig = result.plot() fig.savefig( os.path.join(self.path, self.report_img, "decompose", "decompose_" + label + "_" + self.freq + ".png")) plt.close() except: # Decomposition with STL result = STL(ts).fit() fig = result.plot() fig.savefig( os.path.join(self.path, self.report_img, "decompose", "decompose_" + label + "_" + self.freq + ".png")) plt.close()
def v_seasonality(datos): """ Visualización de la prueba de estacionalidad por medio de gráficas de los datos ya estacionarios Parameters ---------- datos : pd.DataFrame : con información contenida en archivo leido Returns ------- Cuatro gráficas en una imagen que reflejan la prueba de estacionalidad de los datos """ datos = fn.f_leer_archivo( param_archivo='archivos/FedInterestRateDecision-UnitedStates.xlsx', sheet_name=0) datos = datos.set_index('datetime') datos_dif = datos - datos.shift() datos_dif.dropna(inplace=True) serie = datos_dif['actual'] serie = serie.resample('M').mean().ffill() result = STL(serie).fit() charts = result.plot() plt.show()
def decompostion_STL(series, period=None, title=''): from statsmodels.tsa.seasonal import STL stl = STL(series, period=period, robust=True) res_robust = stl.fit() fig = res_robust.plot() fig.text(0.1, 0.95, title, size=15, color='purple') plt.show()
def test_short_class(default_kwargs_short): class_kwargs, outer, inner = _to_class_kwargs(default_kwargs_short) mod = STL(**class_kwargs) res = mod.fit(outer_iter=outer, inner_iter=inner) expected = results.loc['short'].sort_index() assert_allclose(res.seasonal, expected.season) assert_allclose(res.trend, expected.trend) assert_allclose(res.weights, expected.rw)
def test_pickle(default_kwargs): class_kwargs, outer, inner = _to_class_kwargs(default_kwargs) mod = STL(**class_kwargs) res = mod.fit() pkl = pickle.dumps(mod) reloaded = pickle.loads(pkl) res2 = reloaded.fit() assert_allclose(res.trend, res2.trend) assert_allclose(res.seasonal, res2.seasonal) assert mod.config == reloaded.config
def test_ntjump_1_class(default_kwargs): default_kwargs['ntjump'] = 1 class_kwargs, outer, inner = _to_class_kwargs(default_kwargs) mod = STL(**class_kwargs) res = mod.fit(outer_iter=outer, inner_iter=inner) expected = results.loc['ntjump-1'].sort_index() assert_allclose(res.seasonal, expected.season) assert_allclose(res.trend, expected.trend) assert_allclose(res.weights, expected.rw)
def test_pandas(default_kwargs, robust): class_kwargs, _, _ = _to_class_kwargs(default_kwargs, robust) endog = pd.Series(class_kwargs['endog'], name='y') period = class_kwargs['period'] mod = STL(endog=endog, period=period) res = mod.fit() assert isinstance(res.trend, pd.Series) assert isinstance(res.seasonal, pd.Series) assert isinstance(res.resid, pd.Series) assert isinstance(res.weights, pd.Series)
def plot_time_trend(df, name): if name == "VN-INDEX": marker_color = HOSE_COLOR else: marker_color = HNX_COLOR stl = STL(df[df.index.year >= 2006]["Close"], period=250, seasonal=21, robust=True) res = stl.fit() fig = make_subplots(shared_xaxes=True, rows=4, cols=1) fig.add_trace(go.Scatter( y=res.observed, x=res.observed.index, name="Orignal Index", showlegend=False, marker_color=marker_color ), row=1, col=1) fig.add_trace(go.Scatter(y=res.trend, x=res.trend.index, name="Trend", showlegend=False, marker_color=marker_color, ), row=2, col=1) fig.add_trace(go.Scatter( y=res.seasonal, x=res.seasonal.index, name="Season", showlegend=False, marker_color=marker_color ), row=3, col=1) fig.add_trace(go.Scatter( y=res.resid, x=res.resid.index, showlegend=False, marker_color=marker_color, name="Resid", ), row=4, col=1) # Update xaxis properties fig.update_yaxes(title_text="Orginal", row=1, col=1) fig.update_yaxes(title_text="Trend", row=2, col=1) fig.update_yaxes(title_text="Seasonal", row=3, col=1) fig.update_yaxes(title_text="Residuals", row=4, col=1) fig.update_layout(title=f"Seasonal-Trend Decomposition of {name}", height=500 ) return fig
def test_baseline_class(default_kwargs): class_kwargs, outer, inner = _to_class_kwargs(default_kwargs) mod = STL(**class_kwargs) res = mod.fit(outer_iter=outer, inner_iter=inner) expected = results.loc['baseline'].sort_index() assert_allclose(res.trend, expected.trend) assert_allclose(res.seasonal, expected.season) assert_allclose(res.weights, expected.rw) resid = class_kwargs['endog'] - expected.trend - expected.season assert_allclose(res.resid, resid)
def test_parameter_checks_seasonal(default_kwargs): class_kwargs, _, _ = _to_class_kwargs(default_kwargs) endog = class_kwargs['endog'] period = class_kwargs['period'] match = 'seasonal must be an odd positive integer >= 3' with pytest.raises(ValueError, match=match): STL(endog=endog, period=period, seasonal=2) with pytest.raises(ValueError, match=match): STL(endog=endog, period=period, seasonal=-7) with pytest.raises(ValueError, match=match): STL(endog=endog, period=period, seasonal=13.0)
def n_sigma(data): from statsmodels.datasets import co2 data = co2.load(True).data print(data.head()) data_len = len(data) data = data.resample('M').mean().ffill() res = STL(data).fit() print(type(data)) print(len(data), len(res.resid), len(res.trend), len(res.seasonal)) res.plot() plt.show()
def testStationarity(df, keywords): for keyword in keywords: product = readData(df, keyword) stl = STL(product, seasonal=13) res = stl.fit() season = res.seasonal result = adfuller(season) if result[1] > 0.05: print(keyword, result[1])
def test_parameter_checks_trend(default_kwargs): class_kwargs, _, _ = _to_class_kwargs(default_kwargs) endog = class_kwargs['endog'] period = class_kwargs['period'] match = 'trend must be an odd positive integer >= 3 where trend > period' with pytest.raises(ValueError, match=match): STL(endog=endog, period=period, trend=14) with pytest.raises(ValueError, match=match): STL(endog=endog, period=period, trend=11) with pytest.raises(ValueError, match=match): STL(endog=endog, period=period, trend=-19) with pytest.raises(ValueError, match=match): STL(endog=endog, period=period, trend=19.0)
def STL_decomposition(df, column, year): df = df[(df.date_c.dt.year == year)] df = df.sort_values(by="date_c") df = df[["date_c", column]] df = df.resample("1D", on="date_c").mean()[[column]] df = df.interpolate(method="time") series = df[column] stl = STL(series, period=29, robust=True) res = stl.fit() print("Trend mean = {}".format(res.trend.mean()), flush=True) return res
def get_ticker_stl(self, ticker, start_date="2015-01-01", end_date=None, period=None): if not end_date: end_date = datetime.datetime.now().date().strftime("%Y-%m-%d") df = self.get_ticker_data(ticker, start_date, end_date) df.index = pd.to_datetime(df.index) df.sort_index(inplace=True) try: # Try to use the input period, force 5 in failure. stl = STL(df["Close"], period=period).fit() except Exception as e: log.warning(e) stl = STL(df["Close"], period=5).fit() return df, stl
def predict(data, hyperparams): if hyperparams['seasonality']: stl_data = pd.Series(data=list(data.iloc[:, 1]), index=list(data.iloc[:, 0])) stl = STL(stl_data, period=hyperparams['period']) resids = stl.fit().resid.values residual_df = pd.DataFrame(data={'residuals': resids}) anomalies = anom_detect().evaluate(residual_df, col_name='residuals') anomalies_indices = list(anomalies.index) else: db = DBSCAN(eps=hyperparams['eps'], min_samples=hyperparams['min_pts']).fit(data) anomalies_indices = np.argwhere(db.labels_ == -1).flatten().tolist() return anomalies_indices
def test_parameter_checks_period(default_kwargs): class_kwargs, _, _ = _to_class_kwargs(default_kwargs) endog = class_kwargs['endog'] endog2 = np.hstack((endog[:, None], endog[:, None])) period = class_kwargs['period'] with pytest.raises(ValueError, match='y must be a 1d array'): STL(endog=endog2, period=period) match = 'period must be a positive integer >= 2' with pytest.raises(ValueError, match=match): STL(endog=endog, period=1) with pytest.raises(ValueError, match=match): STL(endog=endog, period=-12) with pytest.raises(ValueError, match=match): STL(endog=endog, period=4.0)
def stl_decomposition(series, period=12): """ Run STL decomposition on a pandas Series object. Parameters ---------- series : Series object The observations to be deseasonalised. period : int (optional) Length of the seasonal period in observations. """ stl = STL(series, period, robust=True) res = stl.fit() return res
def run(self, ds, y, period, points): if (period and period > 300) or (points and points > 500): return self.output_msg( forecast_ds=["Too many forecast or period points! (max 3000)"]) # Financial Series, first element of ds must by the Ticker if len(ds) == 1: ticker = ds[0] df, stl = self.get_ticker_stl(ticker, period=period) df = df.reset_index() ds = df["Date"].values y = df["Close"].values financial = True else: try: # Try to use the input period, force 5 in failure. stl = STL(y, period=period).fit() except Exception as e: print(e) stl = STL(y, period=5).fit() financial = False log.info("Forecasting...") # Prophet df = pd.DataFrame(data={"ds": ds, "y": y}) df["ds"] = pd.to_datetime(df["ds"]) m = Prophet() m.fit(df) if financial: nyse = mcal.get_calendar('NYSE') start_date = datetime.datetime.today() end_date = start_date + datetime.timedelta(days=points) valid_days = nyse.valid_days(start_date=start_date, end_date=end_date) future = pd.DataFrame(data={"ds": [v.date() for v in valid_days]}) future = pd.DataFrame( data={"ds": df["ds"].append(future["ds"], ignore_index=True)}) else: future = m.make_future_dataframe(periods=points) forecast = m.predict(future) forecast_df = [] for dt in forecast["ds"].values: ts = pd.to_datetime(dt) forecast_df.append(ts.strftime('%Y-%m-%d')) return self.output_msg(observed=stl.observed, trend=stl.trend, seasonal=stl.seasonal, forecast=forecast["yhat"].values, forecast_ds=forecast_df, forecast_lower=forecast["yhat_lower"].values, forecast_upper=forecast["yhat_upper"].values)
def example_decomp_ts(df_SDF): FONT_SIZE = 14 att = 's_nPacketDn' port_app = 65805 decompfreq = int(24*60/30*7) dates = ['2019-03-16', '2019-06-08'] week_labels = ['2019/03/16', '2019/03/23', '2019/03/30', '2019/04/06', '2019/04/13', '2019/04/20', '2019/04/27', '2019/05/04', '2019/05/11', '2019/05/18', '2019/05/25', '2019/06/01', '2019/06/08'] df_SDF = df_SDF.sort_values('TimeSlot', axis=0) temp = df_SDF[df_SDF.PortApp == port_app] temp.drop_duplicates(subset='TimeSlot', inplace=True) temp = temp.set_index('TimeSlot') temp.index = pd.DatetimeIndex(temp.index) temp = temp.reindex(pd.date_range(*dates, freq='30min'), fill_value=0) temp = temp.loc[dates[0]:dates[1]] list_colors = ['#edf2fb', '#e2eafc', '#d7e3fc', '#ccdbfd'] for method in ['MA', 'STL', 'STL_robust']: if method == 'MA': result = seasonal_decompose(temp[att].values, period=decompfreq, model='additive', two_sided=False) elif method == 'STL': result = STL(temp[att].values, period=decompfreq).fit() elif method == 'STL_robust': result = STL(temp[att].values, period=decompfreq, robust=True).fit() fig, ax = plt.subplots(4, figsize=(9, 9), dpi=400, gridspec_kw={'wspace':0, 'hspace':0}) ax[0].plot(result.observed[2000:], c='black', label='Observed') ax[0].set_facecolor(list_colors[0]) ax[1].plot(result.trend[2000:], c='black', label='Trend') ax[1].set_facecolor(list_colors[1]) ax[2].plot(result.seasonal[2000:], c='black', label='Seasonal') ax[2].set_facecolor(list_colors[2]) ax[3].plot(result.resid[2000:], c='black', label='Residual') ax[3].set_facecolor(list_colors[3]) ax[3].set_xticks(range(0, len(result.observed[2000:]), decompfreq)) ax[3].set_xticklabels([wk[5:] for wk in week_labels[-7:]], fontsize=FONT_SIZE) ax[3].xaxis.set_tick_params(labelsize=FONT_SIZE) for axx in ax: axx.legend(fontsize=FONT_SIZE+4, loc='upper left') axx.yaxis.set_tick_params(labelsize=FONT_SIZE) axx.ticklabel_format(style='sci', axis='y', scilimits=(0,0)) fig.tight_layout() fig.savefig(method + '.png') plt.close()
def test_plot(default_kwargs, close_figures): class_kwargs, outer, inner = _to_class_kwargs(default_kwargs) res = STL(**class_kwargs).fit(outer_iter=outer, inner_iter=inner) res.plot() class_kwargs["endog"] = pd.Series(class_kwargs["endog"], name="CO2") res = STL(**class_kwargs).fit() res.plot()
def test_plot(default_kwargs): class_kwargs, outer, inner = _to_class_kwargs(default_kwargs) res = STL(**class_kwargs).fit(outer_iter=outer, inner_iter=inner) res.plot() class_kwargs['endog'] = pd.Series(class_kwargs['endog'], name='CO2') res = STL(**class_kwargs).fit() res.plot()
def test_parameter_checks_low_pass(default_kwargs): class_kwargs, _, _ = _to_class_kwargs(default_kwargs) endog = class_kwargs['endog'] period = class_kwargs['period'] match = 'low_pass must be an odd positive integer >= 3 where' \ ' low_pass > period' with pytest.raises(ValueError, match=match): STL(endog=endog, period=period, low_pass=14) with pytest.raises(ValueError, match=match): STL(endog=endog, period=period, low_pass=7) with pytest.raises(ValueError, match=match): STL(endog=endog, period=period, low_pass=-19) with pytest.raises(ValueError, match=match): STL(endog=endog, period=period, low_pass=19.0)
def deseason(self, dframe, method='stl', doplot=False): """ Compute and remove seasonal effects in the data. Parameters ---------- dframe: pandas.DataFrame Pandas DataFrame with aggregations applied method: str Method for removing seasonal variations in the data. Acceptable values include: * `stl` : (Default) Use `statsmodels.tsa.seasonal.STL` method * `x13` : Use US Census Bureau X-13ARIMA-SEATS software (see note 2) * `None`: Return the raw aggregated data Returns ------- Pandas.DataFrame with seasonal affects removed as best as possible Notes ----- 1. It's best to supply as much data as possible to this method 2. When using `method='x13'` the data must be aggregated either monthly (`agg='M'`) or quarterly (`agg='Q'`). This method also requires installing the X-13ARIMA-SEATS software and the `statsmodels` python module. """ # Do nothing if method is None if method is None: return dframe # Remove seasonal affects in the data for col in dframe.columns: # Interface to the US Census Bureau seasonal adjustment software if method.lower() == 'x13': results = x13_arima_analysis(dframe[col], trading=False) dframe[col] = results.trend if doplot: results.plot() # Interface to 'statsmodels.tsa.seasonal.STL' elif method.lower() == 'stl': results = STL(dframe[col], robust=False, seasonal=3).fit() dframe[col] = dframe[col]-results.seasonal if doplot: results.plot() return dframe
def twitter_score(x, period=None, seasonal=45): ''' Retorna os index dos valores que são anomalias input precisa ser um Serie com index temporal''' # filtrando o componente seasonal if period is not None: stl = STL(x, period=period, seasonal=seasonal) else: stl = STL(x, seasonal=seasonal) res = stl.fit() # calculamos o residuo residuo = x - np.nanmedian(x) - res.seasonal # Procuramos outliers com MAD mad = MAD() mad.fit(residuo) return mad.decision_function(residuo)
def decompose(): df = pd.read_csv('data/Demand_for_California_hourly_UTC_time.csv', header=0, infer_datetime_format=True, parse_dates=[0], index_col=[0]) df = df.reindex(index=df.index[::-1]) df.index.freq = 'H' # Hourly data. df = df.loc['2015-01-1' : '2019-12-31'] decomposed = STL(df, seasonal=25, period=501).fit() trend = decomposed.trend seasonal = decomposed.seasonal rest = decomposed.resid print(str(rest.var() / (rest.var() + trend.var()))) print(str(rest.var() / (rest.var() + seasonal.var()))) trend.plot() #sns.set(rc={'figure.figsize':(30, 3)}) #components = seasonal_decompose(df['Electricity Demand in the State of California'], model='additive', period=24) #trend = components.trend #seasonal = components.seasonal #rest = components.resid #print(str(1 - rest.var() / (rest.var() + trend.var()))) #print(str(1 - rest.var() / (rest.var() + seasonal.var()))) #components.plot() #components = seasonal_decompose(df['Electricity Demand in the State of California'], model='multiplicative', period=24).seasonal #components.plot() #components = seasonal_decompose(df['Electricity Demand in the State of California'], model='multiplicative', period=24 * 7) #components.plot() #components = seasonal_decompose(df['Electricity Demand in the State of California'], model='multiplicative', period=24 * 7 * 52) #components.plot() plt.show()