def predict_past(self, df, freq_period, steps): scalerfile = self.directory + '/scaler_pred.sav' if not os.path.isfile(scalerfile) or os.path.isfile(scalerfile): if (df["y"].max() - df["y"].min()) > 100: if self.verbose == 1: print("PowerTransformation scaler used") scaler = PowerTransformer() else: if self.verbose == 1: print("Identity scaler used") scaler = IdentityTransformer() self.scaler2 = scaler.fit(np.reshape(np.array(df["y"]), (-1, 1))) Y = self.scaler2.transform(np.reshape(np.array(df["y"]), (-1, 1))) pickle.dump(self.scaler2, open(scalerfile, 'wb')) elif os.path.isfile(scalerfile): self.scaler2 = pickle.load(open(scalerfile, "rb")) Y = self.scaler2.transform(np.reshape(np.array(df["y"]), (-1, 1))) if freq_period % 2 == 0: freq_period = freq_period + 1 decomposition = STL(Y, period=freq_period + 1) decomposition = decomposition.fit() decomposition.plot() plt.show() df.loc[:, 'trend'] = decomposition.trend df.loc[:, 'seasonal'] = decomposition.seasonal df.loc[:, 'residual'] = decomposition.resid df= df.fillna(method="bfill") self.trend = np.asarray(df.loc[:, 'trend']) self.seasonal = np.asarray(df.loc[:, 'seasonal']) self.residual = np.asarray(df.loc[:, 'residual']) prediction, _, _ = self.make_prediction(steps) return prediction[0]
def test_plot(default_kwargs): class_kwargs, outer, inner = _to_class_kwargs(default_kwargs) res = STL(**class_kwargs).fit(outer_iter=outer, inner_iter=inner) res.plot() class_kwargs['endog'] = pd.Series(class_kwargs['endog'], name='CO2') res = STL(**class_kwargs).fit() res.plot()
def test_plot(default_kwargs, close_figures): class_kwargs, outer, inner = _to_class_kwargs(default_kwargs) res = STL(**class_kwargs).fit(outer_iter=outer, inner_iter=inner) res.plot() class_kwargs["endog"] = pd.Series(class_kwargs["endog"], name="CO2") res = STL(**class_kwargs).fit() res.plot()
def n_sigma(data): from statsmodels.datasets import co2 data = co2.load(True).data print(data.head()) data_len = len(data) data = data.resample('M').mean().ffill() res = STL(data).fit() print(type(data)) print(len(data), len(res.resid), len(res.trend), len(res.seasonal)) res.plot() plt.show()
def deseason(self, dframe, method='stl', doplot=False): """ Compute and remove seasonal effects in the data. Parameters ---------- dframe: pandas.DataFrame Pandas DataFrame with aggregations applied method: str Method for removing seasonal variations in the data. Acceptable values include: * `stl` : (Default) Use `statsmodels.tsa.seasonal.STL` method * `x13` : Use US Census Bureau X-13ARIMA-SEATS software (see note 2) * `None`: Return the raw aggregated data Returns ------- Pandas.DataFrame with seasonal affects removed as best as possible Notes ----- 1. It's best to supply as much data as possible to this method 2. When using `method='x13'` the data must be aggregated either monthly (`agg='M'`) or quarterly (`agg='Q'`). This method also requires installing the X-13ARIMA-SEATS software and the `statsmodels` python module. """ # Do nothing if method is None if method is None: return dframe # Remove seasonal affects in the data for col in dframe.columns: # Interface to the US Census Bureau seasonal adjustment software if method.lower() == 'x13': results = x13_arima_analysis(dframe[col], trading=False) dframe[col] = results.trend if doplot: results.plot() # Interface to 'statsmodels.tsa.seasonal.STL' elif method.lower() == 'stl': results = STL(dframe[col], robust=False, seasonal=3).fit() dframe[col] = dframe[col]-results.seasonal if doplot: results.plot() return dframe
def v_seasonality(datos): """ Visualización de la prueba de estacionalidad por medio de gráficas de los datos ya estacionarios Parameters ---------- datos : pd.DataFrame : con información contenida en archivo leido Returns ------- Cuatro gráficas en una imagen que reflejan la prueba de estacionalidad de los datos """ datos = fn.f_leer_archivo( param_archivo='archivos/FedInterestRateDecision-UnitedStates.xlsx', sheet_name=0) datos = datos.set_index('datetime') datos_dif = datos - datos.shift() datos_dif.dropna(inplace=True) serie = datos_dif['actual'] serie = serie.resample('M').mean().ffill() result = STL(serie).fit() charts = result.plot() plt.show()
def decompose_ts(self, df, label, freq='W'): self.freq = freq ts = df[['TaskDate', 'TaskCount']].set_index('TaskDate').resample(freq).sum() sns.set(rc={"figure.figsize": (10, 8)}) print(f"{freq} decomposition of {label}") try: # Decomposition 1 result = seasonal_decompose( ts, model='additive' ) # {model='additive', model='multiplicative'}, optional fig = result.plot() fig.savefig( os.path.join(self.path, self.report_img, "decompose", "decompose_" + label + "_" + self.freq + ".png")) plt.close() except: # Decomposition with STL result = STL(ts).fit() fig = result.plot() fig.savefig( os.path.join(self.path, self.report_img, "decompose", "decompose_" + label + "_" + self.freq + ".png")) plt.close()
def analysis(file_name="", ds_col="Date", target_col="Price", points=365, stl_period=5, invert=True, normalize=False, norm_log=False): fn = file_name if file_name else input("CSV File: ") df = pd.read_csv(fn) if invert: df = df.iloc[::-1] df[ds_col] = pd.to_datetime(df[ds_col]) if normalize: max_value = max(df[target_col]) normalized_series = [x / max_value for x in df[target_col]] normalized_series = np.array(normalized_series, dtype=np.float32) df[target_col] = normalized_series elif norm_log: df[target_col] = np.log(df[target_col]) # STL df2 = df[[target_col]] df2.index = df[ds_col] stl = STL(df2, period=stl_period).fit() # Prophet df = df.reset_index() df = df[[ds_col, target_col]] df = df.rename(columns={ds_col: "ds", target_col: "y"}) m = Prophet() m.fit(df) future = m.make_future_dataframe(periods=points) forecast = m.predict(future) # Plotting Charts name = fn.split("/")[-1].replace(".csv", "") stl.plot() plt.savefig("{}_STL.png".format(name)) fig1 = m.plot(forecast) fig1.savefig("{}_forecast.png".format(name)) fig2 = m.plot_components(forecast) fig2.savefig("{}_forecast_components.png".format(name))
data['Datetime'] = pd.to_datetime(data["Datetime"]) data = data.set_index('Datetime') # data.drop(['Datetime'], axis=1, inplace=True) # data.head() rcParams['figure.figsize'] = 30, 10 decomposition = sm.tsa.seasonal_decompose(data['Global_active_power'], model='additive') fig = decomposition.plot() dir(decomposition) decomposition.observed decomposition.trend.tail() decomposition.seasonal.tail() decomposition.resid.tail() cycle, trend = sm.tsa.filters.hpfilter(series, 50) from statsmodels.tsa.seasonal import STL result = STL(series).fit() chart = result.plot() plt.show() import datetime # Then you'll have, using datetime.timedelta: date_1 = datetime.datetime.strptime(start_date, "%m/%d/%y") end_date = date_1 + datetime.timedelta(days=10)
# plot autocorrelation function to decide lagging # fig = plt.figure(figsize=(11, 10)) # ax = fig.add_subplot(211) # plot_acf(yi, lags=50, ax=ax) # ax2 = fig.add_subplot(212) # plot_pacf(yi, lags=50, method='ols',ax=ax2) # plt.show() ## transform data: boxcox, deseasonalize, detrend # boxcox to achieve stationarity in variance y_trans, lam = boxcox(yi.values.flatten()) y_trans = pd.Series(y_trans, index=yi.index) results = STL(y_trans).fit() results.plot() plt.show() # deseasonal, detrend: y_dd = results.resid ###Predict using Holt Winter’s Exponential Smoothing (HWES), time series with trend and seasonal component # a manual split n_test = int(0.2 * len(y_trans)) train, test = y_trans[:-n_test], y_dd[-n_test:] model = ExponentialSmoothing(train, trend='add', seasonal='add', seasonal_periods=30,
testing_set_df["date"] = pd.to_datetime(testing_set_df["date"]) df = training_set_df.loc[training_set_df["id"] == "FOODS_2_360_WI_2_validation"] df2 = testing_set_df.loc[testing_set_df["id"] == "FOODS_2_360_WI_2_validation"] df.set_index("date", inplace = True) df2.set_index("date", inplace = True) cycle, trend = sm.tsa.filters.hpfilter(df["demand"], lamb = 6.25) # Annual lambda gdp_decomp = df[["demand"]] gdp_decomp["cycle"] = cycle gdp_decomp["trend"] = trend gdp_decomp.plot.line() plt.show() result = STL(df["demand"]).fit() result.plot() plt.show() sell_prices = pd.read_csv(SELL_PRICES_PATH_str) stv = pd.read_csv(SALES_TRAIN_PATH_str) stv.drop(["item_id", "dept_id", "cat_id", "store_id", "state_id"], axis = 1, inplace = True) stv.set_index("id", inplace = True) cor_mat = np.corrcoef(stv) cor_mat = pd.DataFrame(cor_mat, index = stv.index, columns = stv.index.tolist()) abs_cor_mat = np.abs(cor_mat) count = 0 for i in range(abs_cor_mat.shape[0]): for j in range(i + 1, abs_cor_mat.shape[0]): if abs_cor_mat.iloc[i, j] > 0.8:
import matplotlib.pyplot as plt from pandas.plotting import register_matplotlib_converters from statsmodels.datasets import co2 from statsmodels.tsa.seasonal import STL register_matplotlib_converters() data = co2.load(True).data data = data.resample('M').mean().ffill() res = STL(data).fit() res.plot() plt.show()
from statsmodels.tsa.seasonal import STL from sklearn import preprocessing scaler = preprocessing.MinMaxScaler() def add_stl_plot(fig, res, legend): """Add plots from additional STL fits""" axs = fig.get_axes() comps = ['observed', 'trend', 'seasonal', 'resid'] for ax, comp in zip(axs[0:], comps): for r in res: series = getattr(r, comp) if comp == 'resid': ax.plot(series, marker='o', linestyle='none') else: ax.plot(series) if comp == 'observed': ax.legend(legend, frameon=False) df1_units_sum = df1.groupby('period')[f'units'].sum() df2_units_sum = df2.groupby('period')[f'units'].sum() plt.figure(figsize=(12,12)) res1_units = STL(df1_units_sum, robust=True).fit() fig = res1_units.plot() res2_units = STL(df2_units_sum, robust=True).fit() add_stl_plot(fig, [res2_units], ['product 1', 'product 2']) fig.set_size_inches(12,12)