Exemple #1
0
 def predict_past(self, df, freq_period, steps):
     scalerfile = self.directory + '/scaler_pred.sav'
     if not os.path.isfile(scalerfile) or os.path.isfile(scalerfile):
         if (df["y"].max() - df["y"].min()) > 100:
             if self.verbose == 1:
                 print("PowerTransformation scaler used")
             scaler = PowerTransformer()
         else:
             if self.verbose == 1:
                 print("Identity scaler used")
             scaler = IdentityTransformer()
         self.scaler2 = scaler.fit(np.reshape(np.array(df["y"]), (-1, 1)))
         Y = self.scaler2.transform(np.reshape(np.array(df["y"]), (-1, 1)))
         pickle.dump(self.scaler2, open(scalerfile, 'wb'))
     elif os.path.isfile(scalerfile):
         self.scaler2 = pickle.load(open(scalerfile, "rb"))
         Y = self.scaler2.transform(np.reshape(np.array(df["y"]), (-1, 1)))
     if freq_period % 2 == 0:
         freq_period = freq_period + 1
     decomposition = STL(Y, period=freq_period + 1)
     decomposition = decomposition.fit()
     decomposition.plot()
     plt.show()
     df.loc[:, 'trend'] = decomposition.trend
     df.loc[:, 'seasonal'] = decomposition.seasonal
     df.loc[:, 'residual'] = decomposition.resid
     df= df.fillna(method="bfill")
     self.trend = np.asarray(df.loc[:, 'trend'])
     self.seasonal = np.asarray(df.loc[:, 'seasonal'])
     self.residual = np.asarray(df.loc[:, 'residual'])
     prediction, _, _ = self.make_prediction(steps)
     return prediction[0]
def test_plot(default_kwargs):
    class_kwargs, outer, inner = _to_class_kwargs(default_kwargs)
    res = STL(**class_kwargs).fit(outer_iter=outer, inner_iter=inner)
    res.plot()

    class_kwargs['endog'] = pd.Series(class_kwargs['endog'], name='CO2')
    res = STL(**class_kwargs).fit()
    res.plot()
Exemple #3
0
def test_plot(default_kwargs, close_figures):
    class_kwargs, outer, inner = _to_class_kwargs(default_kwargs)
    res = STL(**class_kwargs).fit(outer_iter=outer, inner_iter=inner)
    res.plot()

    class_kwargs["endog"] = pd.Series(class_kwargs["endog"], name="CO2")
    res = STL(**class_kwargs).fit()
    res.plot()
def n_sigma(data):
    from statsmodels.datasets import co2
    data = co2.load(True).data
    print(data.head())
    data_len = len(data)
    data = data.resample('M').mean().ffill()
    res = STL(data).fit()
    print(type(data))
    print(len(data), len(res.resid), len(res.trend), len(res.seasonal))
    res.plot()
    plt.show()
Exemple #5
0
    def deseason(self, dframe, method='stl', doplot=False):
        """ Compute and remove seasonal effects in the data.

        Parameters
        ----------
        dframe: pandas.DataFrame
            Pandas DataFrame with aggregations applied
        method: str
            Method for removing seasonal variations in the data. Acceptable
            values include: 
            * `stl` : (Default) Use `statsmodels.tsa.seasonal.STL` method
            * `x13` : Use US Census Bureau X-13ARIMA-SEATS software (see note 2)
            * `None`: Return the raw aggregated data
        
        Returns
        -------
        Pandas.DataFrame with seasonal affects removed as best as possible

        Notes
        -----
        1. It's best to supply as much data as possible to this method
        2. When using `method='x13'` the data must be aggregated either monthly
           (`agg='M'`) or quarterly (`agg='Q'`). This method also requires 
           installing the X-13ARIMA-SEATS software and the `statsmodels` python
           module.
        """
        # Do nothing if method is None
        if method is None:
            return dframe

        # Remove seasonal affects in the data
        for col in dframe.columns:
            
            # Interface to the US Census Bureau seasonal adjustment software
            if method.lower() == 'x13':
                results = x13_arima_analysis(dframe[col], trading=False)
                dframe[col] = results.trend
                if doplot:
                    results.plot()
            # Interface to 'statsmodels.tsa.seasonal.STL'
            elif method.lower() == 'stl':
                results = STL(dframe[col], robust=False, seasonal=3).fit()
                dframe[col] = dframe[col]-results.seasonal

                if doplot:
                    results.plot()
        
        return dframe
def v_seasonality(datos):
    """
    Visualización de la prueba de estacionalidad por medio de gráficas de los 
    datos ya estacionarios
    
    Parameters
    ----------
    datos : pd.DataFrame : con información contenida en archivo leido

    Returns
    -------
    Cuatro gráficas en una imagen que reflejan la prueba de estacionalidad de 
    los datos 

    """
    datos = fn.f_leer_archivo(
        param_archivo='archivos/FedInterestRateDecision-UnitedStates.xlsx',
        sheet_name=0)
    datos = datos.set_index('datetime')
    datos_dif = datos - datos.shift()
    datos_dif.dropna(inplace=True)
    serie = datos_dif['actual']
    serie = serie.resample('M').mean().ffill()
    result = STL(serie).fit()
    charts = result.plot()
    plt.show()
Exemple #7
0
    def decompose_ts(self, df, label, freq='W'):

        self.freq = freq

        ts = df[['TaskDate',
                 'TaskCount']].set_index('TaskDate').resample(freq).sum()

        sns.set(rc={"figure.figsize": (10, 8)})
        print(f"{freq} decomposition of {label}")

        try:

            # Decomposition 1
            result = seasonal_decompose(
                ts, model='additive'
            )  # {model='additive', model='multiplicative'}, optional
            fig = result.plot()
            fig.savefig(
                os.path.join(self.path, self.report_img, "decompose",
                             "decompose_" + label + "_" + self.freq + ".png"))
            plt.close()

        except:

            # Decomposition with STL
            result = STL(ts).fit()
            fig = result.plot()
            fig.savefig(
                os.path.join(self.path, self.report_img, "decompose",
                             "decompose_" + label + "_" + self.freq + ".png"))
            plt.close()
Exemple #8
0
def analysis(file_name="",
             ds_col="Date",
             target_col="Price",
             points=365,
             stl_period=5,
             invert=True,
             normalize=False,
             norm_log=False):
    fn = file_name if file_name else input("CSV File: ")
    df = pd.read_csv(fn)
    if invert:
        df = df.iloc[::-1]
    df[ds_col] = pd.to_datetime(df[ds_col])
    if normalize:
        max_value = max(df[target_col])
        normalized_series = [x / max_value for x in df[target_col]]
        normalized_series = np.array(normalized_series, dtype=np.float32)
        df[target_col] = normalized_series
    elif norm_log:
        df[target_col] = np.log(df[target_col])
    # STL
    df2 = df[[target_col]]
    df2.index = df[ds_col]
    stl = STL(df2, period=stl_period).fit()
    # Prophet
    df = df.reset_index()
    df = df[[ds_col, target_col]]
    df = df.rename(columns={ds_col: "ds", target_col: "y"})
    m = Prophet()
    m.fit(df)
    future = m.make_future_dataframe(periods=points)
    forecast = m.predict(future)
    # Plotting Charts
    name = fn.split("/")[-1].replace(".csv", "")
    stl.plot()
    plt.savefig("{}_STL.png".format(name))
    fig1 = m.plot(forecast)
    fig1.savefig("{}_forecast.png".format(name))
    fig2 = m.plot_components(forecast)
    fig2.savefig("{}_forecast_components.png".format(name))
Exemple #9
0
data['Datetime'] = pd.to_datetime(data["Datetime"])
data = data.set_index('Datetime')
# data.drop(['Datetime'], axis=1, inplace=True)
# data.head()

rcParams['figure.figsize'] = 30, 10
decomposition = sm.tsa.seasonal_decompose(data['Global_active_power'],
                                          model='additive')
fig = decomposition.plot()
dir(decomposition)

decomposition.observed
decomposition.trend.tail()
decomposition.seasonal.tail()
decomposition.resid.tail()

cycle, trend = sm.tsa.filters.hpfilter(series, 50)

from statsmodels.tsa.seasonal import STL

result = STL(series).fit()
chart = result.plot()
plt.show()

import datetime
# Then you'll have, using datetime.timedelta:

date_1 = datetime.datetime.strptime(start_date, "%m/%d/%y")

end_date = date_1 + datetime.timedelta(days=10)
Exemple #10
0
# plot autocorrelation function to decide lagging
# fig = plt.figure(figsize=(11, 10))
# ax = fig.add_subplot(211)
# plot_acf(yi, lags=50, ax=ax)
# ax2 = fig.add_subplot(212)
# plot_pacf(yi, lags=50, method='ols',ax=ax2)
# plt.show()

## transform data: boxcox, deseasonalize, detrend
# boxcox to achieve stationarity in variance
y_trans, lam = boxcox(yi.values.flatten())

y_trans = pd.Series(y_trans, index=yi.index)

results = STL(y_trans).fit()
results.plot()
plt.show()
# deseasonal, detrend:
y_dd = results.resid

###Predict using Holt Winter’s Exponential Smoothing (HWES), time series with trend and seasonal component
# a manual split

n_test = int(0.2 * len(y_trans))

train, test = y_trans[:-n_test], y_dd[-n_test:]

model = ExponentialSmoothing(train,
                             trend='add',
                             seasonal='add',
                             seasonal_periods=30,
Exemple #11
0
testing_set_df["date"] = pd.to_datetime(testing_set_df["date"])
df = training_set_df.loc[training_set_df["id"] == "FOODS_2_360_WI_2_validation"]
df2 = testing_set_df.loc[testing_set_df["id"] == "FOODS_2_360_WI_2_validation"]
df.set_index("date", inplace = True)
df2.set_index("date", inplace = True)

cycle, trend = sm.tsa.filters.hpfilter(df["demand"], lamb = 6.25) # Annual lambda
gdp_decomp = df[["demand"]]
gdp_decomp["cycle"] = cycle
gdp_decomp["trend"] = trend

gdp_decomp.plot.line()
plt.show()

result = STL(df["demand"]).fit()
result.plot()
plt.show()

sell_prices = pd.read_csv(SELL_PRICES_PATH_str)
stv = pd.read_csv(SALES_TRAIN_PATH_str)
stv.drop(["item_id", "dept_id", "cat_id", "store_id", "state_id"], axis = 1, inplace = True)
stv.set_index("id", inplace = True)
cor_mat = np.corrcoef(stv)
cor_mat = pd.DataFrame(cor_mat, index = stv.index, columns = stv.index.tolist())
abs_cor_mat = np.abs(cor_mat)

count = 0

for i in range(abs_cor_mat.shape[0]):
    for j in range(i + 1, abs_cor_mat.shape[0]):
        if abs_cor_mat.iloc[i, j] > 0.8:
Exemple #12
0
import matplotlib.pyplot as plt
from pandas.plotting import register_matplotlib_converters

from statsmodels.datasets import co2
from statsmodels.tsa.seasonal import STL

register_matplotlib_converters()
data = co2.load(True).data
data = data.resample('M').mean().ffill()

res = STL(data).fit()
res.plot()
plt.show()
Exemple #13
0
from statsmodels.tsa.seasonal import STL
from sklearn import preprocessing
scaler = preprocessing.MinMaxScaler()

def add_stl_plot(fig, res, legend):
    """Add plots from additional STL fits"""
    axs = fig.get_axes()
    comps = ['observed', 'trend', 'seasonal', 'resid']
    for ax, comp in zip(axs[0:], comps):
        for r in res:
            series = getattr(r, comp)
            if comp == 'resid':
                ax.plot(series, marker='o', linestyle='none')
            else:
                ax.plot(series)
                if comp == 'observed':
                    ax.legend(legend, frameon=False)
                    

df1_units_sum = df1.groupby('period')[f'units'].sum()
df2_units_sum = df2.groupby('period')[f'units'].sum()

plt.figure(figsize=(12,12))
res1_units = STL(df1_units_sum, robust=True).fit()
fig = res1_units.plot()
res2_units = STL(df2_units_sum, robust=True).fit()
add_stl_plot(fig, [res2_units], ['product 1', 'product 2'])
fig.set_size_inches(12,12)