Beispiel #1
0
def generate_forecast(df_, regressor, forecast_horizon, window_length):
    df = df_.copy()
    #Replacing NaN values with the forward fill method
    #df.fillna(method = 'ffill', inplace = True)
    
    #Resetting the index of the time series,
    #because sktime doesn't support DatetimeIndex for now
    y_train = df.iloc[:,-1].reset_index(drop=True)

    fh = np.arange(forecast_horizon) + 1
    regressor = select_regressor(regressor)
    forecaster = ReducedRegressionForecaster(regressor=regressor, window_length=window_length,
                                             strategy='recursive')
    forecaster.fit(y_train, fh=fh)
    y_pred = forecaster.predict(fh)
      
    date = '1/1/2016' #df.index[0]
    periods = df.shape[0] + forecast_horizon
    #Creating a new DatetimeIndex that goes
    #as far in the future as the forecast horizon
    date_index = pd.date_range(date, periods=periods, freq='M')
    
    col_name = ' Forecast' 
    df_pred = pd.DataFrame({col_name: y_pred}) 
    #Appending the forecast as a new column to the dataframe
    df = df.append(df_pred, ignore_index=True)
    #Setting the DatetimeIndex we created
    #as the new index of the dataframe
    df.set_index(date_index, inplace=True)
    
    return df
Beispiel #2
0
def calculate_smape(df_, regressor, forecast_horizon, window_length):
    df = df_.copy()
    df.fillna(method = 'ffill', inplace = True)
    y = df.iloc[:,-1].reset_index(drop=True)
    y_train, y_test = temporal_train_test_split(y, test_size = 12)
    fh = np.arange(y_test.shape[0]) + 1
    regressor = select_regressor(regressor)
    forecaster = ReducedRegressionForecaster(regressor=regressor, window_length=window_length,
                                             strategy='recursive')
    forecaster.fit(y_train, fh=fh)
    y_pred = forecaster.predict(fh)
    
    return smape_loss(y_pred, y_test)
Beispiel #3
0
def rf_forecast(y_train: pd.Series, forecast_horizon: np.array) -> pd.Series:
    """
    Fit a random forest model with training data and forecast for a given horizon.
    Args:
        y_train: Historic dataset to fit model.
        forecast_horizon: Array of forecast periods [1, ... , n] n being number of desired periods to forecast.

    Returns: A pandas series of consumption forecast with a datetimeindex.

    """
    regressor = RandomForestRegressor(n_estimators=100)
    forecaster = ReducedRegressionForecaster(
        regressor=regressor, window_length=15, strategy="recursive"
    )
    forecaster.fit(y_train)
    forecast = forecaster.predict(forecast_horizon).rename("consumption")
    return forecast
Beispiel #4
0
 def calculate_forecast(df_, regressor, forecast_horizon, window_length):
     df = df_.copy()
     new_forecast = []
     if regressor == 'Naive' or regressor == 'Theta' or regressor == 'Exp_Smoothing' or regressor == 'TBATS':
         regressor = select_regressor(regressor)
         forecaster = regressor
     else:
         regressor = select_regressor(regressor)
         forecaster = ReducedRegressionForecaster(regressor = regressor, window_length = window_length, strategy='recursive')
     for i in df.columns :
         y = df.iloc[:,df.columns.get_loc(i)].reset_index(drop=True)
         fh = np.arange(forecast_horizon) + 1
         forecaster.fit(y, fh=fh)
         y_pred = forecaster.predict(fh)
         new_forecast.append(y_pred)
     new_forecast = pd.concat(new_forecast, axis=1)
     new_forecast.columns=df.columns.tolist()
     return new_forecast
Beispiel #5
0
    def calculate_smape(df_, regressor, forecast_horizon, window_length):
        df = df_.copy()
        dn_forecast = []
        dn_test =[]
        results = []
        if regressor == 'Naive' or regressor == 'Theta' or regressor == 'Exp_Smoothing' or regressor == 'TBATS':
            regressor = select_regressor(regressor)
            forecaster = regressor
        else:
            regressor = select_regressor(regressor)
            forecaster = ReducedRegressionForecaster(regressor = regressor, window_length = window_length, strategy='recursive')
        for i in df.columns:
            y = df.iloc[:,df.columns.get_loc(i)].reset_index(drop=True)
            y_train, y_test = temporal_train_test_split(y, test_size = 12)
            fh = np.arange(y_test.shape[0]) + 1
            forecaster.fit(y_train, fh=fh)
            y_pred = forecaster.predict(fh)
            dn_forecast.append(y_pred)
            dn_test.append(y_test)
        dn_forecast = pd.concat(dn_forecast, axis=1)
        dn_test = pd.concat(dn_test, axis=1)
        dn_forecast.columns=dn_test.columns.tolist()
        
        
        fig, ax = plt.subplots(1, 1,figsize=(15, 6), facecolor='w', edgecolor='k')
        fig.subplots_adjust(hspace = .5, wspace=.001)
        #fig.suptitle('last 12 months actual vs forecast')

        for column in dn_test:
            results.append(round(100*smape_loss(dn_forecast[column],dn_test[column]),1))
        
        ax.plot(dn_forecast['total'],'o-',color='orange' ,label="predicted")
        ax.plot(dn_test['total'], 'o-',color='blue',label="actual")
        ax.set_title('Testing the performance: last 12 month actual vs forecast')
        ax.legend()
        st.pyplot(fig)
        #plt.show()
        return pd.DataFrame(results).set_index(dn_test.columns)
Beispiel #6
0
def wpmodel(wpg):
    model = RandomForestRegressor(random_state=0)
    ry = range(2015, 2021); rm = range(1, 13)
    dt = pd.concat([pd.DataFrame({'Month': rm}, index=[y] * len(rm))
                   .reset_index().rename(columns={'index': 'Year'}) for y in ry]) \
        .reset_index(drop=True)
    csd, csdp = {}, {}
    for c in wpg.Channel.unique():  # ['N1BK']
        dtcs = dt.drop(dt.loc[((dt.Year == 2020) & (dt.Month >= 8))].index) \
            .merge(wpg.loc[wpg.Channel == c], how='left', left_on=['Year', 'Month'], right_on=['Year', 'Month'])
        cs = dtcs.fillna(method='ffill').dropna()
        cspct = cs[['Ton', 'Baht']].pct_change().rename(columns={'Ton': 'TonPct', 'Baht': 'BahtPct'}) * 100
        csdiff = cs[['Ton', 'Baht']].diff().rename(columns={'Ton': 'TonDiff', 'Baht': 'BahtDiff'})
        cs = cs.merge(cspct, how='left', left_index=True, right_index=True)
        cs = cs.merge(csdiff, how='left', left_index=True, right_index=True)
        cs['E'] = cs.TonPct / cs.BahtPct
        cs['S'] = cs.TonDiff / cs.BahtDiff
        cs['EC'] = cs.E.clip(-1000, 1000).fillna(method='ffill')
        cs['SC'] = cs.E.clip(-1000, 1000).fillna(method='ffill')
        cs['Type'] = 'Actual'
        csd[c] = cs[['Year', 'Month', 'EC', 'Type']]
    for c in wpg.Channel.unique():
        a = len(csd[c].EC)
        if a > 20:
            csdp[c] = csd[c]
    for c in csdp.keys():
        print(c)
        y = csdp[c].EC.dropna(); num_step = 5; fh = np.arange(1, num_step + 1)  # forecasting horizon
        f = ReducedRegressionForecaster(model, window_length=12)  # monthly seasonal periodicity
        f.fit(y)
        y_pred = f.predict(fh)
        y_pred_df = pd.DataFrame(y_pred)
        y_pred_df.rename(columns={0: 'EC'}, inplace=True)
        y_pred_df['Type'] = 'Forecast'
        y_pred_df = dt.merge(y_pred_df, how='inner', left_index=True, right_index=True)
        csdp[c] = csdp[c].append(y_pred_df)
    return csdp
def main(opt, verbose=0):
    wind_speed = load_data(station=opt.station)

    y_train, y_test = wind_speed.iloc[:-opt.test_size], wind_speed.iloc[
        -opt.test_size:]
    plot_ys(y_train, y_test, labels=("y_train", "y_test"))

    # ================================== Model ==================================

    emd = EMD()
    imfs = emd(wind_speed.values).T

    num_imfs = imfs.shape[1]
    imfs = pd.DataFrame(imfs,
                        index=pd.RangeIndex(start=0, stop=len(imfs), step=1),
                        columns=["imf%d" % i
                                 for i in range(num_imfs - 1)] + ["residue"])

    y_trains_, y_tests_ = imfs.iloc[:-opt.test_size], imfs.iloc[-opt.
                                                                test_size:]

    index = imfs.index[-opt.test_size:]
    columns = pd.MultiIndex.from_product(
        [["imf%d" % i for i in range(num_imfs)],
         ["step%d" % i for i in opt.steps]])
    y_preds = pd.DataFrame(np.full((len(index), len(columns)), np.nan),
                           index=index,
                           columns=columns)

    for i in range(num_imfs):
        print("imf%d:" % i if i != num_imfs - 1 else "residue:")
        y_train_, y_test_ = y_trains_.iloc[:, i], y_tests_.iloc[:, i]

        if i in [0]:
            param_grid = {
                "regressor__clf__C": [1, 5, 10, 25, 50, 100, 150],
                "regressor__clf__gamma": ['scale', 0.001, 0.01, 0.1, 1.0],
                'regressor__fs__percentile': range(10, 100, 10),
            }
            regressor = Pipeline([("fs",
                                   SelectPercentile(percentile=50,
                                                    score_func=f_regression)),
                                  ("clf", SVR(C=5, gamma="scale"))])
        else:
            param_grid = {"regressor__normalize": [True, False]}
            regressor = LassoLarsCV()
        forecaster = ReducedRegressionForecaster(
            regressor=regressor,
            window_length=opt.window_length,
            strategy=opt.strategy)
        grid_search = ParallelForecastingGridSearchCV(
            forecaster,
            cv=SlidingWindowSplitter(initial_window=int(len(y_train_) * 0.7)),
            param_grid=param_grid,
            scoring=make_forecasting_scorer(root_mean_squared_error,
                                            name="rmse"),
            n_jobs=opt.n_jobs,
            verbose=verbose)
        y_preds_ = multistep_forecasting(grid_search,
                                         y_train_,
                                         y_test_,
                                         steps=opt.steps)
        print([
            root_mean_squared_error(y_test_, y_preds_["step%d" % step])
            for step in opt.steps
        ])
        y_preds["imf%d" % i] = y_preds_

    y_preds = y_preds.swaplevel(1, 0, axis=1)
    y_preds = pd.concat([
        y_preds["step%d" % step].sum(axis=1, skipna=False)
        for step in opt.steps
    ],
                        axis=1)
    y_preds.columns = ["step%d" % i for i in opt.steps]
    y_preds.to_excel(
        "output/%s_%s.xls" %
        (opt.station, os.path.split(__file__)[-1].rsplit(".")[0].upper()))

    print([
        root_mean_squared_error(y_test, y_preds["step%d" % step])
        for step in opt.steps
    ])
Beispiel #8
0
# In[51]:

from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV

# In[52]:

forecaster_param_grid = {"window_length": [5, 10, 15]}
regressor_param_grid = {"n_estimators": [100, 200, 300]}

# In[53]:

regressor = GridSearchCV(RandomForestRegressor(),
                         param_grid=regressor_param_grid)
forecaster = ReducedRegressionForecaster(regressor=regressor, window_length=10)

# In[55]:

cv = SlidingWindowSplitter(initial_window=int(len(train) * 0.5))
gscv = ForecastingGridSearchCV(forecaster,
                               cv=cv,
                               param_grid=forecaster_param_grid)

# In[56]:

gscv.fit(train)
y_pred = gscv.predict(fh)

# In[57]:
Beispiel #9
0
            y_pred.index
        ]  # select only time points which we predicted
        scores[i] = scoring(y_test_subset, y_pred)
    return scores


@pytest.mark.parametrize(
    "forecaster, param_dict",
    [
        (NaiveForecaster(strategy="mean"), {"window_length": TEST_WINDOW_LENGTHS}),
        # atomic estimator
        (
            TransformedTargetForecaster(
                [  # composite estimator
                    ("t", Detrender(PolynomialTrendForecaster())),
                    ("f", ReducedRegressionForecaster(LinearRegression())),
                ]
            ),
            {
                "f__window_length": TEST_WINDOW_LENGTHS,
                "f__step_length": TEST_STEP_LENGTHS,
            },
        ),  # multiple params
    ],
)
@pytest.mark.parametrize(
    "scoring",
    [sMAPE(), make_forecasting_scorer(mean_squared_error, greater_is_better=False)],
)
@pytest.mark.parametrize(
    "cv",
def forecast(data,
             customer_id,
             start='2017-01',
             end='2019-04',
             model_type='NaiveForecaster',
             test_size_month=5,
             model_storage_path=''):
    """
    Main function for build forecasting model on selected customer and time interval, save the model and plotting

    Parameters
    ----------
    data: pandas DataFrame
        main dataset with customer_id, product_id and Timestamp

    customer_id: int

    start: string
        start year and month in '2020-01' format

    end: string
        end year and month in '2020-01' format *** this month will not be included ***

    model_type:
        type of model to use in forecasting
        select from : ['NaiveForecaster', 'PolynomialTrendForecaster', 'ThetaForecaster', 'KNeighborsRegressor',
                       'ExponentialSmoothing', 'AutoETS', 'AutoARIMA', 'TBATS', 'BATS', 'EnsembleForecaster']

    test_size_month:
        number of month that will be excluded from end of interval to use as test dataset

    model_storage_path: string
        the folder that you want to store saved models
    Returns
    -------
    sMAPE Loss: print

    plot: matplotlib figure
        plot train, test and predicted values
    """
    y_train, y_test = temporal_train_test_split(prepare_data(data,
                                                             customer_id,
                                                             start=start,
                                                             end=end),
                                                test_size=test_size_month)
    fh = ForecastingHorizon(y_test.index, is_relative=False)

    if model_type == 'NaiveForecaster':
        forecaster = NaiveForecaster(strategy="last", sp=12)
    elif model_type == 'PolynomialTrendForecaster':
        forecaster = PolynomialTrendForecaster(degree=2)
    elif model_type == 'ThetaForecaster':
        forecaster = ThetaForecaster(sp=6)
    elif model_type == 'KNeighborsRegressor':
        regressor = KNeighborsRegressor(n_neighbors=1)
        forecaster = ReducedRegressionForecaster(regressor=regressor,
                                                 window_length=12,
                                                 strategy="recursive")
    elif model_type == 'ExponentialSmoothing':
        forecaster = ExponentialSmoothing(trend="add",
                                          seasonal="multiplicative",
                                          sp=12)
    elif model_type == 'AutoETS':
        forecaster = AutoETS(auto=True, sp=12, n_jobs=-1)
    elif model_type == 'AutoARIMA':
        forecaster = AutoARIMA(sp=12, suppress_warnings=True)
    elif model_type == 'TBATS':
        forecaster = TBATS(sp=12, use_trend=True, use_box_cox=False)
    elif model_type == 'BATS':
        forecaster = BATS(sp=12, use_trend=True, use_box_cox=False)
    elif model_type == 'EnsembleForecaster':
        forecaster = EnsembleForecaster([
            ("ses", ExponentialSmoothing(seasonal="multiplicative", sp=12)),
            (
                "holt",
                ExponentialSmoothing(trend="add",
                                     damped_trend=False,
                                     seasonal="multiplicative",
                                     sp=12),
            ),
            (
                "damped",
                ExponentialSmoothing(trend="add",
                                     damped_trend=True,
                                     seasonal="multiplicative",
                                     sp=12),
            ),
        ])

    try:
        forecaster.fit(y_train)
    except:
        forecaster.fit(y_train + 1)

    y_pred = forecaster.predict(fh)
    dump(
        forecaster,
        f'{model_storage_path}/{customer_id}_{model_type}_{start}_{end}_{test_size_month}.model'
    )

    print('sMAPE Loss :', smape_loss(y_pred, y_test))
    plot = plot_series(y_train,
                       y_test,
                       y_pred,
                       labels=["y_train", "y_test", "y_pred"])
    return plot
Beispiel #11
0
def genforecast(data):
    from sktime.forecasting.model_selection import temporal_train_test_split
    import numpy as np
    import math
    y_train, y_test = temporal_train_test_split(data)
    fh = np.arange(1, len(y_test) + 1)
    testct = len(y_test)

    from sktime.forecasting.naive import NaiveForecaster
    forecaster = NaiveForecaster(strategy="drift")
    forecaster.fit(y_train)
    y_pred_naive = forecaster.predict(fh)
    from sktime.performance_metrics.forecasting import smape_loss
    naive_acc = round(smape_loss(y_pred_naive, y_test), 4)
    #full model dev and forecast next 5 days
    forecaster.fit(data)
    futurewin = np.arange(1, 6)  # 5 day in future prediction
    fut_pred = forecaster.predict(futurewin)
    min_naive = round(min(fut_pred), 2)
    max_naive = round(max(fut_pred), 2)

    from sktime.forecasting.trend import PolynomialTrendForecaster
    forecaster = PolynomialTrendForecaster(degree=1)
    forecaster.fit(y_train)
    y_pred_poly = forecaster.predict(fh)
    from sktime.performance_metrics.forecasting import smape_loss
    poly_acc = round(smape_loss(y_pred_poly, y_test), 4)
    #full model dev and forecast next 5 days
    forecaster.fit(data)
    futurewin = np.arange(1, 6)  # 5 day in future prediction
    fut_pred = forecaster.predict(futurewin)
    min_poly = round(min(fut_pred), 2)
    max_poly = round(max(fut_pred), 2)

    from sktime.forecasting.compose import EnsembleForecaster
    from sktime.forecasting.exp_smoothing import ExponentialSmoothing
    sp1 = math.floor(len(y_test) / 4)
    sp2 = min(sp1, 12)
    spval = max(2, sp2)
    forecaster = EnsembleForecaster([
        ("ses", ExponentialSmoothing(seasonal="multiplicative", sp=spval)),
        ("holt",
         ExponentialSmoothing(trend="add",
                              damped=False,
                              seasonal="multiplicative",
                              sp=spval)),
        ("damped",
         ExponentialSmoothing(trend="add",
                              damped=True,
                              seasonal="multiplicative",
                              sp=spval))
    ])
    forecaster.fit(y_train)
    y_pred_ensem = forecaster.predict(fh)
    ensem_acc = round(smape_loss(y_test, y_pred_ensem), 4)
    #full model dev and forecast next 5 days
    forecaster.fit(data)
    futurewin = np.arange(1, 6)  # 5 day in future prediction
    fut_pred = forecaster.predict(futurewin)
    min_ensem = round(min(fut_pred), 2)
    max_ensem = round(max(fut_pred), 2)

    from sklearn.neighbors import KNeighborsRegressor
    regressor = KNeighborsRegressor(n_neighbors=1)
    from sktime.forecasting.compose import ReducedRegressionForecaster
    forecaster = ReducedRegressionForecaster(regressor=regressor,
                                             window_length=15,
                                             strategy="recursive")
    param_grid = {"window_length": [5, 10, 15]}
    from sktime.forecasting.model_selection import SlidingWindowSplitter
    from sktime.forecasting.model_selection import ForecastingGridSearchCV
    # we fit the forecaster on the initial window, and then use temporal cross-validation to find the optimal parameter
    cv = SlidingWindowSplitter(initial_window=int(len(y_train) * 0.5))
    gscv = ForecastingGridSearchCV(forecaster, cv=cv, param_grid=param_grid)
    gscv.fit(y_train)
    y_pred_redreg = gscv.predict(fh)
    redreg_acc = round(smape_loss(y_test, y_pred_redreg), 4)
    #full model dev and forecast next 5 days
    gscv.fit(data)
    futurewin = np.arange(1, 6)  # 5 day in future prediction
    fut_pred = gscv.predict(futurewin)
    min_redreg = round(min(fut_pred), 2)
    max_redreg = round(max(fut_pred), 2)

    return min_naive, max_naive, min_poly, max_poly, min_ensem, max_ensem, min_redreg, max_redreg, y_test, testct, y_pred_naive, naive_acc, y_pred_poly, poly_acc, y_pred_ensem, ensem_acc, y_pred_redreg, redreg_acc
Beispiel #12
0
    * 模块化并与scikit-learn兼容,因此我们可以轻松地应用任何scikit-learn回归器来解决我们的预测问题;
    * 可调整的,允许我们调整超参数,例如窗口长度或生成预测的策略
    * 自适应的,从某种意义上讲,它可以使scikit-learn的估算器界面适应预测者的界面,并确保我们可以调整和正确评估模型
''')

y = load_airline()
y_train, y_test = temporal_train_test_split(y, test_size=36)
st.write("y_train.shape[0],y_test.shape[0]:", y_train.shape[0],
         y_test.shape[0])

from sktime.forecasting.compose import ReducedRegressionForecaster
from sklearn.neighbors import KNeighborsRegressor

regressor = KNeighborsRegressor(n_neighbors=1)
forecaster = ReducedRegressionForecaster(regressor=regressor,
                                         window_length=12,
                                         strategy="recursive")
forecaster.fit(y_train)
y_pred = forecaster.predict(fh)
plot_ys(y_train, y_test, y_pred, labels=["y_train", "y_test", "y_pred"])
st.pyplot()
st.write("smape_loss(y_test, y_pred):", smape_loss(y_test, y_pred))

st.write('''
    为了更好地理解先前的数据转换,我们可以看看如何将训练数据划分为多个窗口。 
    本质上,sktime使用时间时间序列分割器,类似于scikit-learn中的交叉验证分割器。 
    在这里,我们展示了这对于训练数据的前20个观察结果是如何工作的:
''')
with st.echo():
    from sktime.forecasting.model_selection import SlidingWindowSplitter
    cv = SlidingWindowSplitter(window_length=10, start_with_window=True)
Beispiel #13
0
 models.append(('Theta', ThetaForecaster(sp=12)))
 models.append(('Exp_Smoothing', ExponentialSmoothing(trend="add", seasonal="additive", sp=12)))
 models.append(('TBATS', TBATS(sp=12, use_trend=True, use_box_cox=False)))
 
 forecast_horizon = st.sidebar.slider(label = 'Forecast Length (months)',min_value = 3, max_value = 36, value = 12)
 window_length = st.sidebar.slider(label = 'Sliding Window Length ',min_value = 1, value = 12)
 # evaluate each model in turn
 results1 = []
 names = []
 dn_forecast = []
 dn_test =[]
 
 
 for name, model in models:
     if name == 'LR' or name == 'KNN' or name == 'RF' or name == 'GB' or name == 'XGBoost' or name == 'SVM' or name == 'Extra Trees':
         forecaster = ReducedRegressionForecaster(regressor=model, window_length=window_length,strategy='recursive')
     else:
         forecaster = model
     y = df2['total'].reset_index(drop=True)
     y_train, y_test = temporal_train_test_split(y, test_size = 12)
     fh = np.arange(y_test.shape[0]) + 1
     forecaster.fit(y_train)
     y_pred = forecaster.predict(fh)
     dn_forecast.append(y_pred)
     dn_test.append(y_test)
     accuracy_results = mean_squared_error(y_test,y_pred,squared=False)
     results1.append(accuracy_results)
     names.append(name)
     msg = "%s: %.0f " % (name, accuracy_results.mean())
     #print(msg)
 #plot algorithm comparison
naive_forecaster_seasonal = NaiveForecaster(strategy="seasonal_last", sp=12)
naive_forecaster_seasonal.fit(y_train)
y_seasonal_last = naive_forecaster_seasonal.predict(fh)

plot_ys(y_train, y_test, y_last, y_seasonal_last, labels=["y_train", "y_test", "y_pred_last", "y_pred_seasonal_last"]);
smape_loss(y_last, y_test)

"""sklearn regressors with forcasting"""

from sktime.forecasting.compose import ReducedRegressionForecaster
from sklearn.ensemble import RandomForestRegressor
from sktime.forecasting.model_selection import temporal_train_test_split
from sktime.performance_metrics.forecasting import smape_loss

regressor = RandomForestRegressor()
forecaster = ReducedRegressionForecaster(regressor, window_length=12)
forecaster.fit(y_train)
y_pred = forecaster.predict(fh)

plot_ys(y_train, y_test, y_pred, labels=['y_train', 'y_test', 'y_pred'])
smape_loss(y_test, y_pred)

"""Forcasting with autoarima"""

from sktime.forecasting.arima import AutoARIMA
forecaster = AutoARIMA(sp=12)
forecaster.fit(y_train)

y_pred = forecaster.predict(fh)
plot_ys(y_train, y_test, y_pred, labels=["y_train", "y_test", "y_pred"]);
smape_loss(y_test, y_pred)