Beispiel #1
0
def test_gscv_fit(forecaster, param_dict, cv, scoring):
    param_grid = ParameterGrid(param_dict)

    y = load_airline()
    gscv = ForecastingGridSearchCV(
        forecaster, param_grid=param_dict, cv=cv, scoring=scoring
    )
    gscv.fit(y)

    # check scores
    gscv_scores = gscv.cv_results_[f"mean_test_{scoring.name}"]
    expected_scores = compute_expected_gscv_scores(
        forecaster, cv, param_grid, y, scoring
    )
    np.testing.assert_array_equal(gscv_scores, expected_scores)

    # check best parameters
    assert gscv.best_params_ == param_grid[gscv_scores.argmin()]

    # check best forecaster is the one with best parameters
    assert {
        key: value
        for key, value in gscv.best_forecaster_.get_params().items()
        if key in gscv.best_params_.keys()
    } == gscv.best_params_
Beispiel #2
0
def test_multiplex_with_grid_search():
    """Test MultiplexForecaster perfromas as expected with ForecastingGridSearchCV.

    Because the typical use case of MultiplexForecaster is to use it with the
    ForecastingGridSearchCV forecaster - here we simply test that the best
    "selected_forecaster" for MultiplexForecaster found using ForecastingGridSearchCV
    is the same forecaster we would find if we evaluated all the forecasters in
    MultiplexForecaster independently.
    """
    y = load_shampoo_sales()
    forecasters = [
        ("ets", AutoETS()),
        ("naive", NaiveForecaster()),
    ]
    multiplex_forecaster = MultiplexForecaster(forecasters=forecasters)
    forecaster_names = [name for name, _ in forecasters]
    cv = ExpandingWindowSplitter(start_with_window=True, step_length=12)
    gscv = ForecastingGridSearchCV(
        cv=cv,
        param_grid={"selected_forecaster": forecaster_names},
        forecaster=multiplex_forecaster,
    )
    gscv.fit(y)
    gscv_best_name = gscv.best_forecaster_.selected_forecaster
    best_name = _score_forecasters(forecasters, cv, y)
    assert gscv_best_name == best_name
Beispiel #3
0
def test_gscv(forecaster, param_grid, cv, scoring):
    y, X = load_longley()
    gscv = ForecastingGridSearchCV(forecaster,
                                   param_grid=param_grid,
                                   cv=cv,
                                   scoring=scoring)
    gscv.fit(y, X)

    param_grid = ParameterGrid(param_grid)
    _check_cv(forecaster, gscv, cv, param_grid, y, X, scoring)
Beispiel #4
0
forecaster = ExponentialSmoothing()

# In[39]:

forecaster_param_grid = {
    'trend': ['add', 'mul'],
    'seasonal': ['add', 'mul'],
    'sp': [12]
}

# In[40]:

cv = SlidingWindowSplitter(initial_window=int(len(train) * 0.5))
gscv = ForecastingGridSearchCV(forecaster,
                               cv=cv,
                               param_grid=forecaster_param_grid)
gscv.fit(train)
y_pred = gscv.predict(fh)

# In[43]:

gscv.best_params_

# In[42]:

plot_ys(train, test, y_pred, labels=["y_train", "y_test", "y_pred"])
smape_loss(test, y_pred)

# ### Tune Forecaster & Reduced Regressor
Beispiel #5
0
def genforecast(data):
    from sktime.forecasting.model_selection import temporal_train_test_split
    import numpy as np
    import math
    y_train, y_test = temporal_train_test_split(data)
    fh = np.arange(1, len(y_test) + 1)
    testct = len(y_test)

    from sktime.forecasting.naive import NaiveForecaster
    forecaster = NaiveForecaster(strategy="drift")
    forecaster.fit(y_train)
    y_pred_naive = forecaster.predict(fh)
    from sktime.performance_metrics.forecasting import smape_loss
    naive_acc = round(smape_loss(y_pred_naive, y_test), 4)
    #full model dev and forecast next 5 days
    forecaster.fit(data)
    futurewin = np.arange(1, 6)  # 5 day in future prediction
    fut_pred = forecaster.predict(futurewin)
    min_naive = round(min(fut_pred), 2)
    max_naive = round(max(fut_pred), 2)

    from sktime.forecasting.trend import PolynomialTrendForecaster
    forecaster = PolynomialTrendForecaster(degree=1)
    forecaster.fit(y_train)
    y_pred_poly = forecaster.predict(fh)
    from sktime.performance_metrics.forecasting import smape_loss
    poly_acc = round(smape_loss(y_pred_poly, y_test), 4)
    #full model dev and forecast next 5 days
    forecaster.fit(data)
    futurewin = np.arange(1, 6)  # 5 day in future prediction
    fut_pred = forecaster.predict(futurewin)
    min_poly = round(min(fut_pred), 2)
    max_poly = round(max(fut_pred), 2)

    from sktime.forecasting.compose import EnsembleForecaster
    from sktime.forecasting.exp_smoothing import ExponentialSmoothing
    sp1 = math.floor(len(y_test) / 4)
    sp2 = min(sp1, 12)
    spval = max(2, sp2)
    forecaster = EnsembleForecaster([
        ("ses", ExponentialSmoothing(seasonal="multiplicative", sp=spval)),
        ("holt",
         ExponentialSmoothing(trend="add",
                              damped=False,
                              seasonal="multiplicative",
                              sp=spval)),
        ("damped",
         ExponentialSmoothing(trend="add",
                              damped=True,
                              seasonal="multiplicative",
                              sp=spval))
    ])
    forecaster.fit(y_train)
    y_pred_ensem = forecaster.predict(fh)
    ensem_acc = round(smape_loss(y_test, y_pred_ensem), 4)
    #full model dev and forecast next 5 days
    forecaster.fit(data)
    futurewin = np.arange(1, 6)  # 5 day in future prediction
    fut_pred = forecaster.predict(futurewin)
    min_ensem = round(min(fut_pred), 2)
    max_ensem = round(max(fut_pred), 2)

    from sklearn.neighbors import KNeighborsRegressor
    regressor = KNeighborsRegressor(n_neighbors=1)
    from sktime.forecasting.compose import ReducedRegressionForecaster
    forecaster = ReducedRegressionForecaster(regressor=regressor,
                                             window_length=15,
                                             strategy="recursive")
    param_grid = {"window_length": [5, 10, 15]}
    from sktime.forecasting.model_selection import SlidingWindowSplitter
    from sktime.forecasting.model_selection import ForecastingGridSearchCV
    # we fit the forecaster on the initial window, and then use temporal cross-validation to find the optimal parameter
    cv = SlidingWindowSplitter(initial_window=int(len(y_train) * 0.5))
    gscv = ForecastingGridSearchCV(forecaster, cv=cv, param_grid=param_grid)
    gscv.fit(y_train)
    y_pred_redreg = gscv.predict(fh)
    redreg_acc = round(smape_loss(y_test, y_pred_redreg), 4)
    #full model dev and forecast next 5 days
    gscv.fit(data)
    futurewin = np.arange(1, 6)  # 5 day in future prediction
    fut_pred = gscv.predict(futurewin)
    min_redreg = round(min(fut_pred), 2)
    max_redreg = round(max(fut_pred), 2)

    return min_naive, max_naive, min_poly, max_poly, min_ensem, max_ensem, min_redreg, max_redreg, y_test, testct, y_pred_naive, naive_acc, y_pred_poly, poly_acc, y_pred_ensem, ensem_acc, y_pred_redreg, redreg_acc
print("Arima with Seas.\t", mape_arima_seas)

series_log = series.apply(np.log)

plot_series(series_log)

fh = ForecastingHorizon(series[-25:].index, is_relative=False)

y_train, y_test = temporal_train_test_split(series.iloc[train_[0]:test_[0] +
                                                        1],
                                            test_size=1)

y_predA, y_confA = ForecastingGridSearchCV(
    ARIMA(),
    SlidingWindowSplitter(window_length=48,
                          start_with_window=True,
                          initial_window=48), {
                              'order': [(p, d, q)]
                          },
    n_jobs=-1).fit(series).predict(fh, return_pred_int=True)

y_predAS, y_confAS = ForecastingGridSearchCV(
    ARIMA(),
    SlidingWindowSplitter(window_length=48,
                          start_with_window=True,
                          initial_window=48), {
                              'order': [(p, d, q)],
                              'seasonal_order': [(P, D, Q, S)]
                          },
    n_jobs=-1).fit(series).predict(fh, return_pred_int=True)

y_predN = ForecastingGridSearchCV(NaiveForecaster(),
Beispiel #7
0
st.pyplot()
st.write("smape_loss(y_test, y_pred):", smape_loss(y_test, y_pred))

st.write('''
    * Tuning
    In the `ReducedRegressionForecaster`, 
    both the `window_length` and `strategy arguments` are hyper-parameters which we may want to optimise.
''')
from sktime.forecasting.model_selection import ForecastingGridSearchCV

forecaster = ReducedRegressionForecaster(regressor=regressor,
                                         window_length=15,
                                         strategy="recursive")
param_grid = {"window_length": [5, 10, 15]}
cv = SlidingWindowSplitter(initial_window=int(len(y_train) * 0.5))
gscv = ForecastingGridSearchCV(forecaster, cv=cv, param_grid=param_grid)
gscv.fit(y_train)
y_pred = gscv.predict(fh)

plot_ys(y_train, y_test, y_pred, labels=["y_train", "y_test", "y_pred"])
st.pyplot()
st.write("smape_loss(y_test, y_pred):", smape_loss(y_test, y_pred))
st.write("gscv.best_params_:", gscv.best_params_)

st.write('''
    * Detrending
    请注意,到目前为止,上述减少方法并未考虑任何季节或趋势,但我们可以轻松地指定首先对数据进行趋势去除的管道。
    sktime提供了一个通用的去趋势器,它是一个使用任何预测器并返回预测器预测值的样本内残差的转换器。 
    例如,要删除时间序列的线性趋势,我们可以写成
''')