Ejemplo n.º 1
0
def test_avg_mean(forecasters):
    """Assert `mean` aggfunc returns the same values as `average` with equal weights."""
    y = make_forecasting_problem()
    forecaster = EnsembleForecaster(forecasters)
    forecaster.fit(y, fh=[1, 2, 3])
    mean_pred = forecaster.predict()

    forecaster_1 = EnsembleForecaster(forecasters, aggfunc="mean", weights=[1, 1])
    forecaster_1.fit(y, fh=[1, 2, 3])
    avg_pred = forecaster_1.predict()

    pd.testing.assert_series_equal(mean_pred, avg_pred)
Ejemplo n.º 2
0
def test_aggregation_unweighted(forecasters, aggfunc):
    """Assert aggfunc returns the correct values."""
    y = make_forecasting_problem()
    forecaster = EnsembleForecaster(forecasters=forecasters, aggfunc=aggfunc)
    forecaster.fit(y, fh=[1, 2, 3])
    actual_pred = forecaster.predict()

    predictions = []
    _aggfunc = VALID_AGG_FUNCS[aggfunc]["unweighted"]
    for _, forecaster in forecasters:
        f = forecaster
        f.fit(y)
        f_pred = f.predict(fh=[1, 2, 3])
        predictions.append(f_pred)
    predictions = pd.DataFrame(predictions).T
    expected_pred = predictions.apply(func=_aggfunc, axis=1)

    pd.testing.assert_series_equal(actual_pred, expected_pred)
Ejemplo n.º 3
0
def run_sktimes(dept_id, store_id):
    # create timeseries for fbprophet
    ts = CreateTimeSeries(dept_id, store_id)

    # sktime ensembler
    forecaster = EnsembleForecaster([
        ('naive_ses', NaiveForecaster(sp=28, strategy="seasonal_last")),
        ('naive', NaiveForecaster(strategy="last")),
        ('theta_ses', ThetaForecaster(sp=28)), ('theta', ThetaForecaster()),
        ("exp_ses", ExponentialSmoothing(seasonal="additive", sp=28)),
        ("exp_damped",
         ExponentialSmoothing(trend='additive',
                              damped=True,
                              seasonal="additive",
                              sp=28))
    ])
    forecaster.fit(ts.y + 1)
    y_pred = forecaster.predict(np.arange(1, 29))
    return np.append(np.array([dept_id, store_id]), y_pred - 1)
Ejemplo n.º 4
0
def construct_M4_forecasters(sp, fh):
    kwargs = {"model": SEASONAL_MODEL, "sp": sp} if sp > 1 else {}

    theta_bc = make_pipeline(
        ConditionalDeseasonalizer(seasonality_test=seasonality_test_R,
                                  **kwargs), BoxCoxTransformer(bounds=(0, 1)),
        ThetaForecaster(deseasonalise=False))
    """
    MLP = make_pipeline(
        ConditionalDeseasonalizer(seasonality_test=seasonality_test_Python,
                                  **kwargs),
        Detrender(PolynomialTrendForecaster(degree=1, with_intercept=True)),
        RecursiveRegressionForecaster(
            regressor=MLPRegressor(hidden_layer_sizes=6, activation="identity",
                                   solver="adam", max_iter=100,
                                   learning_rate="adaptive",
                                   learning_rate_init=0.001),
            window_length=3)
    )
    RNN = make_pipeline(
        ConditionalDeseasonalizer(seasonality_test=seasonality_test_Python,
                                  **kwargs),
        Detrender(PolynomialTrendForecaster(degree=1, with_intercept=True)),
        RecursiveTimeSeriesRegressionForecaster(
            regressor=SimpleRNNRegressor(nb_epochs=100),
            window_length=3)
    )
    """
    forecasters = {
        "Naive":
        NaiveForecaster(strategy="last"),
        "sNaive":
        NaiveForecaster(strategy="seasonal_last", sp=sp),
        "Naive2":
        deseasonalise(NaiveForecaster(strategy="last"), **kwargs),
        "SES":
        deseasonalise(ses, **kwargs),
        "Holt":
        deseasonalise(holt, **kwargs),
        "Damped":
        deseasonalise(damped, **kwargs),
        "Theta":
        deseasonalise(ThetaForecaster(deseasonalise=False), **kwargs),
        "ARIMA":
        AutoARIMA(suppress_warnings=True, error_action="ignore", sp=sp),
        "Com":
        deseasonalise(
            EnsembleForecaster([("ses", ses), ("holt", holt),
                                ("damped", damped)]), **kwargs),
        # "MLP": MLP,
        # "RNN": RNN,
        "260":
        theta_bc,
    }
    return forecasters
Ejemplo n.º 5
0
def test_invalid_aggfuncs(forecasters, aggfunc):
    """Check if invalid aggregation functions return Error."""
    y = make_forecasting_problem()
    forecaster = EnsembleForecaster(forecasters=forecasters, aggfunc=aggfunc)
    forecaster.fit(y, fh=[1, 2])
    with pytest.raises(ValueError, match=r"not recognized"):
        forecaster.predict()
Ejemplo n.º 6
0
def test_aggregation_weighted(forecasters, aggfunc, weights):
    """Assert weighted aggfunc returns the correct values."""
    y = make_forecasting_problem()
    forecaster = EnsembleForecaster(
        forecasters=forecasters, aggfunc=aggfunc, weights=weights
    )
    forecaster.fit(y, fh=[1, 2, 3])
    actual_pred = forecaster.predict()

    predictions = []
    for _, forecaster in forecasters:
        f = forecaster
        f.fit(y)
        f_pred = f.predict(fh=[1, 2, 3])
        predictions.append(f_pred)
    predictions = pd.DataFrame(predictions).T
    _aggfunc = VALID_AGG_FUNCS[aggfunc]["weighted"]
    expected_pred = pd.Series(
        _aggfunc(predictions, axis=1, weights=np.array(weights)),
        index=predictions.index,
    )
    # expected_pred = predictions.apply(func=_aggfunc, axis=1, weights=weights)
    pd.testing.assert_series_equal(actual_pred, expected_pred)
Ejemplo n.º 7
0
def test_aggregation_weighted(forecasters, aggfunc, weights):
    """Assert weighted aggfunc returns the correct values."""
    y = make_forecasting_problem()
    forecaster = EnsembleForecaster(forecasters=forecasters,
                                    aggfunc=aggfunc,
                                    weights=weights)
    forecaster.fit(y, fh=[1, 2, 3])
    actual_pred = forecaster.predict()

    predictions = []
    for _, forecaster in forecasters:
        f = forecaster
        f.fit(y)
        f_pred = f.predict(fh=[1, 2, 3])
        predictions.append(f_pred)

    predictions = pd.DataFrame(predictions)
    if aggfunc == "mean":
        func = np.average
    else:
        func = gmean
    expected_pred = predictions.apply(func=func, axis=0, weights=weights)

    pd.testing.assert_series_equal(actual_pred, expected_pred)
Ejemplo n.º 8
0
def main():
    df = datasets.load_airline(
    )  #Univariate, monthly records from 1949 to 60 (144 records)
    y_train, y_test = temporal_train_test_split(
        df, test_size=36)  #36 months for testing

    forecaster = NaiveForecaster(
        strategy='seasonal_last', sp=12
    )  #model strategy: last, mean, seasonal_last. sp=12months (yearly season)
    forecaster.fit(y_train)  #fit
    fh = np.arange(1,
                   len(y_test) +
                   1)  #forecast horizon: array with the same lenght of y_test
    y_pred = forecaster.predict(fh)  #pred

    forecaster2 = AutoARIMA(sp=12, suppress_warnings=True, trace=1)
    forecaster2.fit(y_train)
    y_pred2 = forecaster2.predict(fh)

    forecaster3 = ExponentialSmoothing(trend='add',
                                       damped='True',
                                       seasonal='multiplicative',
                                       sp=12)
    forecaster3.fit(y_train)
    y_pred3 = forecaster3.predict(fh)

    forecaster4 = ThetaForecaster(sp=12)
    forecaster4.fit(y_train)
    y_pred4 = forecaster4.predict(fh)

    forecaster5 = EnsembleForecaster([
        ('NaiveForecaster', NaiveForecaster(strategy='seasonal_last', sp=12)),
        ('AutoARIMA', AutoARIMA(sp=12, suppress_warnings=True)),
        ('Exp Smoothing',
         ExponentialSmoothing(trend='add',
                              damped='True',
                              seasonal='multiplicative',
                              sp=12)), ('Theta', ThetaForecaster(sp=12))
    ])
    forecaster5.fit(y_train)
    y_pred5 = forecaster5.predict(fh)

    plot_ys(y_train,
            y_test,
            y_pred,
            y_pred2,
            y_pred3,
            y_pred4,
            y_pred5,
            labels=[
                'Train', 'Test', 'Naive Forecaster', 'AutoARIMA',
                'Exp Smoothing', 'Theta', 'Ensemble'
            ])
    plt.xlabel('Months')
    plt.ylabel('Number of flights')
    plt.title(
        'Time series of the number of international flights in function of time'
    )
    plt.show()

    print('SMAPE Error for NaiveForecaster is:',
          100 * round(smape_loss(y_test, y_pred), 3), '%')
    print('SMAPE Error for AutoARIMA is:',
          100 * round(smape_loss(y_test, y_pred2), 3), '%')
    print('SMAPE Error for Exp Smoothing is:',
          100 * round(smape_loss(y_test, y_pred3), 3), '%')
    print('SMAPE Error for Theta is:',
          100 * round(smape_loss(y_test, y_pred4), 3), '%')
    print('SMAPE Error for Ensemble is:',
          100 * round(smape_loss(y_test, y_pred5), 3), '%')
def forecast(data,
             customer_id,
             start='2017-01',
             end='2019-04',
             model_type='NaiveForecaster',
             test_size_month=5,
             model_storage_path=''):
    """
    Main function for build forecasting model on selected customer and time interval, save the model and plotting

    Parameters
    ----------
    data: pandas DataFrame
        main dataset with customer_id, product_id and Timestamp

    customer_id: int

    start: string
        start year and month in '2020-01' format

    end: string
        end year and month in '2020-01' format *** this month will not be included ***

    model_type:
        type of model to use in forecasting
        select from : ['NaiveForecaster', 'PolynomialTrendForecaster', 'ThetaForecaster', 'KNeighborsRegressor',
                       'ExponentialSmoothing', 'AutoETS', 'AutoARIMA', 'TBATS', 'BATS', 'EnsembleForecaster']

    test_size_month:
        number of month that will be excluded from end of interval to use as test dataset

    model_storage_path: string
        the folder that you want to store saved models
    Returns
    -------
    sMAPE Loss: print

    plot: matplotlib figure
        plot train, test and predicted values
    """
    y_train, y_test = temporal_train_test_split(prepare_data(data,
                                                             customer_id,
                                                             start=start,
                                                             end=end),
                                                test_size=test_size_month)
    fh = ForecastingHorizon(y_test.index, is_relative=False)

    if model_type == 'NaiveForecaster':
        forecaster = NaiveForecaster(strategy="last", sp=12)
    elif model_type == 'PolynomialTrendForecaster':
        forecaster = PolynomialTrendForecaster(degree=2)
    elif model_type == 'ThetaForecaster':
        forecaster = ThetaForecaster(sp=6)
    elif model_type == 'KNeighborsRegressor':
        regressor = KNeighborsRegressor(n_neighbors=1)
        forecaster = ReducedRegressionForecaster(regressor=regressor,
                                                 window_length=12,
                                                 strategy="recursive")
    elif model_type == 'ExponentialSmoothing':
        forecaster = ExponentialSmoothing(trend="add",
                                          seasonal="multiplicative",
                                          sp=12)
    elif model_type == 'AutoETS':
        forecaster = AutoETS(auto=True, sp=12, n_jobs=-1)
    elif model_type == 'AutoARIMA':
        forecaster = AutoARIMA(sp=12, suppress_warnings=True)
    elif model_type == 'TBATS':
        forecaster = TBATS(sp=12, use_trend=True, use_box_cox=False)
    elif model_type == 'BATS':
        forecaster = BATS(sp=12, use_trend=True, use_box_cox=False)
    elif model_type == 'EnsembleForecaster':
        forecaster = EnsembleForecaster([
            ("ses", ExponentialSmoothing(seasonal="multiplicative", sp=12)),
            (
                "holt",
                ExponentialSmoothing(trend="add",
                                     damped_trend=False,
                                     seasonal="multiplicative",
                                     sp=12),
            ),
            (
                "damped",
                ExponentialSmoothing(trend="add",
                                     damped_trend=True,
                                     seasonal="multiplicative",
                                     sp=12),
            ),
        ])

    try:
        forecaster.fit(y_train)
    except:
        forecaster.fit(y_train + 1)

    y_pred = forecaster.predict(fh)
    dump(
        forecaster,
        f'{model_storage_path}/{customer_id}_{model_type}_{start}_{end}_{test_size_month}.model'
    )

    print('sMAPE Loss :', smape_loss(y_pred, y_test))
    plot = plot_series(y_train,
                       y_test,
                       y_pred,
                       labels=["y_train", "y_test", "y_pred"])
    return plot
Ejemplo n.º 10
0
def genforecast(data):
    from sktime.forecasting.model_selection import temporal_train_test_split
    import numpy as np
    import math
    y_train, y_test = temporal_train_test_split(data)
    fh = np.arange(1, len(y_test) + 1)
    testct = len(y_test)

    from sktime.forecasting.naive import NaiveForecaster
    forecaster = NaiveForecaster(strategy="drift")
    forecaster.fit(y_train)
    y_pred_naive = forecaster.predict(fh)
    from sktime.performance_metrics.forecasting import smape_loss
    naive_acc = round(smape_loss(y_pred_naive, y_test), 4)
    #full model dev and forecast next 5 days
    forecaster.fit(data)
    futurewin = np.arange(1, 6)  # 5 day in future prediction
    fut_pred = forecaster.predict(futurewin)
    min_naive = round(min(fut_pred), 2)
    max_naive = round(max(fut_pred), 2)

    from sktime.forecasting.trend import PolynomialTrendForecaster
    forecaster = PolynomialTrendForecaster(degree=1)
    forecaster.fit(y_train)
    y_pred_poly = forecaster.predict(fh)
    from sktime.performance_metrics.forecasting import smape_loss
    poly_acc = round(smape_loss(y_pred_poly, y_test), 4)
    #full model dev and forecast next 5 days
    forecaster.fit(data)
    futurewin = np.arange(1, 6)  # 5 day in future prediction
    fut_pred = forecaster.predict(futurewin)
    min_poly = round(min(fut_pred), 2)
    max_poly = round(max(fut_pred), 2)

    from sktime.forecasting.compose import EnsembleForecaster
    from sktime.forecasting.exp_smoothing import ExponentialSmoothing
    sp1 = math.floor(len(y_test) / 4)
    sp2 = min(sp1, 12)
    spval = max(2, sp2)
    forecaster = EnsembleForecaster([
        ("ses", ExponentialSmoothing(seasonal="multiplicative", sp=spval)),
        ("holt",
         ExponentialSmoothing(trend="add",
                              damped=False,
                              seasonal="multiplicative",
                              sp=spval)),
        ("damped",
         ExponentialSmoothing(trend="add",
                              damped=True,
                              seasonal="multiplicative",
                              sp=spval))
    ])
    forecaster.fit(y_train)
    y_pred_ensem = forecaster.predict(fh)
    ensem_acc = round(smape_loss(y_test, y_pred_ensem), 4)
    #full model dev and forecast next 5 days
    forecaster.fit(data)
    futurewin = np.arange(1, 6)  # 5 day in future prediction
    fut_pred = forecaster.predict(futurewin)
    min_ensem = round(min(fut_pred), 2)
    max_ensem = round(max(fut_pred), 2)

    from sklearn.neighbors import KNeighborsRegressor
    regressor = KNeighborsRegressor(n_neighbors=1)
    from sktime.forecasting.compose import ReducedRegressionForecaster
    forecaster = ReducedRegressionForecaster(regressor=regressor,
                                             window_length=15,
                                             strategy="recursive")
    param_grid = {"window_length": [5, 10, 15]}
    from sktime.forecasting.model_selection import SlidingWindowSplitter
    from sktime.forecasting.model_selection import ForecastingGridSearchCV
    # we fit the forecaster on the initial window, and then use temporal cross-validation to find the optimal parameter
    cv = SlidingWindowSplitter(initial_window=int(len(y_train) * 0.5))
    gscv = ForecastingGridSearchCV(forecaster, cv=cv, param_grid=param_grid)
    gscv.fit(y_train)
    y_pred_redreg = gscv.predict(fh)
    redreg_acc = round(smape_loss(y_test, y_pred_redreg), 4)
    #full model dev and forecast next 5 days
    gscv.fit(data)
    futurewin = np.arange(1, 6)  # 5 day in future prediction
    fut_pred = gscv.predict(futurewin)
    min_redreg = round(min(fut_pred), 2)
    max_redreg = round(max(fut_pred), 2)

    return min_naive, max_naive, min_poly, max_poly, min_ensem, max_ensem, min_redreg, max_redreg, y_test, testct, y_pred_naive, naive_acc, y_pred_poly, poly_acc, y_pred_ensem, ensem_acc, y_pred_redreg, redreg_acc
Ejemplo n.º 11
0
    sktime提供了用于组合模型构建的模块化API,以进行预测。 
    
    * Ensembling
    像scikit-learn一样,sktime提供了一个元预测器来集成多种预测算法。
    例如,我们可以如下组合指数平滑的不同变体:
''')

from sktime.forecasting.compose import EnsembleForecaster

forecaster = EnsembleForecaster([
    ("ses", ExponentialSmoothing(seasonal="multiplicative", sp=12)),
    ("holt",
     ExponentialSmoothing(trend="add",
                          damped=False,
                          seasonal="multiplicative",
                          sp=12)),
    ("damped",
     ExponentialSmoothing(trend="add",
                          damped=True,
                          seasonal="multiplicative",
                          sp=12))
])
forecaster.fit(y_train)
y_pred = forecaster.predict(fh)
plot_ys(y_train, y_test, y_pred, labels=["y_train", "y_test", "y_pred"])
st.pyplot()
st.write("smape_loss(y_test, y_pred):", smape_loss(y_test, y_pred))

st.write('''
    * Tuning
    In the `ReducedRegressionForecaster`,