Beispiel #1
0
def forecasting_autoarima(y_train, y_test, s):
    fh = np.arange(len(y_test)) + 1
    forecaster = AutoARIMA(sp=s)
    forecaster.fit(y_train)
    y_pred = forecaster.predict(fh)
    plot_ys(y_train, y_test, y_pred, labels=["y_train", "y_test", "y_pred"])
    st.pyplot()
Beispiel #2
0
def test_multiplex_or_dunder():
    """Test that the MultiplexForecaster magic "|" dunder methodbahves as expected.

    A MultiplexForecaster can be created by using the "|" dunder method on
    either forecaster or MultiplexForecaster objects. Here we test that it performs
    as expected on all the use cases, and raises the expected error in some others.
    """
    # test a simple | example with two forecasters:
    multiplex_two_forecaster = AutoETS() | NaiveForecaster()
    assert isinstance(multiplex_two_forecaster, MultiplexForecaster)
    assert len(multiplex_two_forecaster.forecasters) == 2
    # now test that | also works on two MultiplexForecasters:
    multiplex_one = MultiplexForecaster([("arima", AutoARIMA()),
                                         ("ets", AutoETS())])
    multiplex_two = MultiplexForecaster([("theta", ThetaForecaster()),
                                         ("naive", NaiveForecaster())])
    multiplex_two_multiplex = multiplex_one | multiplex_two
    assert isinstance(multiplex_two_multiplex, MultiplexForecaster)
    assert len(multiplex_two_multiplex.forecasters) == 4
    # last we will check 3 forecaster with the same name - should check both that
    # MultiplexForecaster | forecaster works, and that ensure_unique_names works
    multiplex_same_name_three_test = (NaiveForecaster(strategy="last")
                                      | NaiveForecaster(strategy="mean")
                                      | NaiveForecaster(strategy="drift"))
    assert isinstance(multiplex_same_name_three_test, MultiplexForecaster)
    assert len(multiplex_same_name_three_test.forecasters) == 3
    assert (len(
        set(
            multiplex_same_name_three_test._get_estimator_names(
                multiplex_same_name_three_test.forecasters))) == 3)
    # test we get a ValueError if we try to | with anything else:
    with pytest.raises(TypeError):
        multiplex_one | "this shouldn't work"
Beispiel #3
0
def construct_M4_forecasters(sp, fh):
    kwargs = {"model": SEASONAL_MODEL, "sp": sp} if sp > 1 else {}

    theta_bc = make_pipeline(
        ConditionalDeseasonalizer(seasonality_test=seasonality_test_R,
                                  **kwargs), BoxCoxTransformer(bounds=(0, 1)),
        ThetaForecaster(deseasonalise=False))
    """
    MLP = make_pipeline(
        ConditionalDeseasonalizer(seasonality_test=seasonality_test_Python,
                                  **kwargs),
        Detrender(PolynomialTrendForecaster(degree=1, with_intercept=True)),
        RecursiveRegressionForecaster(
            regressor=MLPRegressor(hidden_layer_sizes=6, activation="identity",
                                   solver="adam", max_iter=100,
                                   learning_rate="adaptive",
                                   learning_rate_init=0.001),
            window_length=3)
    )
    RNN = make_pipeline(
        ConditionalDeseasonalizer(seasonality_test=seasonality_test_Python,
                                  **kwargs),
        Detrender(PolynomialTrendForecaster(degree=1, with_intercept=True)),
        RecursiveTimeSeriesRegressionForecaster(
            regressor=SimpleRNNRegressor(nb_epochs=100),
            window_length=3)
    )
    """
    forecasters = {
        "Naive":
        NaiveForecaster(strategy="last"),
        "sNaive":
        NaiveForecaster(strategy="seasonal_last", sp=sp),
        "Naive2":
        deseasonalise(NaiveForecaster(strategy="last"), **kwargs),
        "SES":
        deseasonalise(ses, **kwargs),
        "Holt":
        deseasonalise(holt, **kwargs),
        "Damped":
        deseasonalise(damped, **kwargs),
        "Theta":
        deseasonalise(ThetaForecaster(deseasonalise=False), **kwargs),
        "ARIMA":
        AutoARIMA(suppress_warnings=True, error_action="ignore", sp=sp),
        "Com":
        deseasonalise(
            EnsembleForecaster([("ses", ses), ("holt", holt),
                                ("damped", damped)]), **kwargs),
        # "MLP": MLP,
        # "RNN": RNN,
        "260":
        theta_bc,
    }
    return forecasters
Beispiel #4
0
    def build_forecaster(self):
        if self.type == 'auto_arima':
            forecaster = AutoARIMA(sp=7, suppress_warnings=True)
        elif self.type == 'prophet':
            forecaster = Prophet()
        elif self.type == 'deepar':
            forecaster = DeepAREstimator(prediction_length=self.horizon,
                                         freq=self.freq,
                                         trainer=Trainer(
                                             ctx="cpu",
                                             epochs=15,
                                             learning_rate=1e-3,
                                             num_batches_per_epoch=100))
        else:
            raise NotImplementedError(
                f'Model {type} is currently not implemented')

        return forecaster
Beispiel #5
0
def train_model_autoarima(y, x, output: bool = True) -> AutoARIMA:
    if output:
        logger.info("Training AutoARIMA model...")
        timer = Timer()
    model = AutoARIMA(suppress_warnings=True, error_action='ignore')

    y = pd.Series(data=np.delete(y, 0))
    x = pd.DataFrame(data=x[:-1])

    model.fit(y, x)

    if output:
        model.summary()
        logger.info(f'Done in {timer}')
    return model
Beispiel #6
0
def test_auto_arima():
    """Test bug in 805.

    https://github.com/alan-turing-institute/sktime/issues/805#issuecomment-891848228.
    """
    time_index = pd.date_range("January 1, 2021", periods=8, freq="1D")
    X = pd.DataFrame(
        np.random.randint(0, 4, 24).reshape(8, 3),
        columns=["First", "Second", "Third"],
        index=time_index,
    )
    y = pd.Series([1, 3, 2, 4, 5, 2, 3, 1], index=time_index)

    fh_ = ForecastingHorizon(X.index[5:], is_relative=False)

    a_clf = AutoARIMA(start_p=2, start_q=2, max_p=5, max_q=5)
    clf = a_clf.fit(X=X[:5], y=y[:5])
    y_pred_sk = clf.predict(fh=fh_, X=X[5:])

    pd.testing.assert_index_equal(
        y_pred_sk.index, pd.date_range("January 6, 2021", periods=3,
                                       freq="1D"))

    time_index = pd.date_range("January 1, 2021", periods=8, freq="2D")
    X = pd.DataFrame(
        np.random.randint(0, 4, 24).reshape(8, 3),
        columns=["First", "Second", "Third"],
        index=time_index,
    )
    y = pd.Series([1, 3, 2, 4, 5, 2, 3, 1], index=time_index)

    fh = ForecastingHorizon(X.index[5:], is_relative=False)

    a_clf = AutoARIMA(start_p=2, start_q=2, max_p=5, max_q=5)
    clf = a_clf.fit(X=X[:5], y=y[:5])
    y_pred_sk = clf.predict(fh=fh, X=X[5:])

    pd.testing.assert_index_equal(
        y_pred_sk.index, pd.date_range("January 11, 2021",
                                       periods=3,
                                       freq="2D"))
Beispiel #7
0
def main():
    df = datasets.load_airline(
    )  #Univariate, monthly records from 1949 to 60 (144 records)
    y_train, y_test = temporal_train_test_split(
        df, test_size=36)  #36 months for testing

    forecaster = NaiveForecaster(
        strategy='seasonal_last', sp=12
    )  #model strategy: last, mean, seasonal_last. sp=12months (yearly season)
    forecaster.fit(y_train)  #fit
    fh = np.arange(1,
                   len(y_test) +
                   1)  #forecast horizon: array with the same lenght of y_test
    y_pred = forecaster.predict(fh)  #pred

    forecaster2 = AutoARIMA(sp=12, suppress_warnings=True, trace=1)
    forecaster2.fit(y_train)
    y_pred2 = forecaster2.predict(fh)

    forecaster3 = ExponentialSmoothing(trend='add',
                                       damped='True',
                                       seasonal='multiplicative',
                                       sp=12)
    forecaster3.fit(y_train)
    y_pred3 = forecaster3.predict(fh)

    forecaster4 = ThetaForecaster(sp=12)
    forecaster4.fit(y_train)
    y_pred4 = forecaster4.predict(fh)

    forecaster5 = EnsembleForecaster([
        ('NaiveForecaster', NaiveForecaster(strategy='seasonal_last', sp=12)),
        ('AutoARIMA', AutoARIMA(sp=12, suppress_warnings=True)),
        ('Exp Smoothing',
         ExponentialSmoothing(trend='add',
                              damped='True',
                              seasonal='multiplicative',
                              sp=12)), ('Theta', ThetaForecaster(sp=12))
    ])
    forecaster5.fit(y_train)
    y_pred5 = forecaster5.predict(fh)

    plot_ys(y_train,
            y_test,
            y_pred,
            y_pred2,
            y_pred3,
            y_pred4,
            y_pred5,
            labels=[
                'Train', 'Test', 'Naive Forecaster', 'AutoARIMA',
                'Exp Smoothing', 'Theta', 'Ensemble'
            ])
    plt.xlabel('Months')
    plt.ylabel('Number of flights')
    plt.title(
        'Time series of the number of international flights in function of time'
    )
    plt.show()

    print('SMAPE Error for NaiveForecaster is:',
          100 * round(smape_loss(y_test, y_pred), 3), '%')
    print('SMAPE Error for AutoARIMA is:',
          100 * round(smape_loss(y_test, y_pred2), 3), '%')
    print('SMAPE Error for Exp Smoothing is:',
          100 * round(smape_loss(y_test, y_pred3), 3), '%')
    print('SMAPE Error for Theta is:',
          100 * round(smape_loss(y_test, y_pred4), 3), '%')
    print('SMAPE Error for Ensemble is:',
          100 * round(smape_loss(y_test, y_pred5), 3), '%')
Beispiel #8
0

ets_frcstr = ExponentialSmoothing(trend='additive', seasonal='additive', sp=12)

ets_frcstr.fit(y_train)



y_pred = ets_frcstr.predict(fh)
plot_series(y_train, y_test, y_pred, labels=['Обучающая', 'т', 'п'])

ets_frcstr.get_fitted_params()
ets_frcstr.get_params()

smape_loss(y_test, y_pred)

auto_ets_frr = AutoETS()
auto_ets_frr.fit(y_pred)


auto_ets_frr.summary()

arima_frr = AutoARIMA()
arima_frr = ARIMA()

forecaster = ARIMA(
    order=(1, 1, 0), seasonal_order=(0, 1, 0, 12), suppress_warnings=True
)


def forecast(data,
             customer_id,
             start='2017-01',
             end='2019-04',
             model_type='NaiveForecaster',
             test_size_month=5,
             model_storage_path=''):
    """
    Main function for build forecasting model on selected customer and time interval, save the model and plotting

    Parameters
    ----------
    data: pandas DataFrame
        main dataset with customer_id, product_id and Timestamp

    customer_id: int

    start: string
        start year and month in '2020-01' format

    end: string
        end year and month in '2020-01' format *** this month will not be included ***

    model_type:
        type of model to use in forecasting
        select from : ['NaiveForecaster', 'PolynomialTrendForecaster', 'ThetaForecaster', 'KNeighborsRegressor',
                       'ExponentialSmoothing', 'AutoETS', 'AutoARIMA', 'TBATS', 'BATS', 'EnsembleForecaster']

    test_size_month:
        number of month that will be excluded from end of interval to use as test dataset

    model_storage_path: string
        the folder that you want to store saved models
    Returns
    -------
    sMAPE Loss: print

    plot: matplotlib figure
        plot train, test and predicted values
    """
    y_train, y_test = temporal_train_test_split(prepare_data(data,
                                                             customer_id,
                                                             start=start,
                                                             end=end),
                                                test_size=test_size_month)
    fh = ForecastingHorizon(y_test.index, is_relative=False)

    if model_type == 'NaiveForecaster':
        forecaster = NaiveForecaster(strategy="last", sp=12)
    elif model_type == 'PolynomialTrendForecaster':
        forecaster = PolynomialTrendForecaster(degree=2)
    elif model_type == 'ThetaForecaster':
        forecaster = ThetaForecaster(sp=6)
    elif model_type == 'KNeighborsRegressor':
        regressor = KNeighborsRegressor(n_neighbors=1)
        forecaster = ReducedRegressionForecaster(regressor=regressor,
                                                 window_length=12,
                                                 strategy="recursive")
    elif model_type == 'ExponentialSmoothing':
        forecaster = ExponentialSmoothing(trend="add",
                                          seasonal="multiplicative",
                                          sp=12)
    elif model_type == 'AutoETS':
        forecaster = AutoETS(auto=True, sp=12, n_jobs=-1)
    elif model_type == 'AutoARIMA':
        forecaster = AutoARIMA(sp=12, suppress_warnings=True)
    elif model_type == 'TBATS':
        forecaster = TBATS(sp=12, use_trend=True, use_box_cox=False)
    elif model_type == 'BATS':
        forecaster = BATS(sp=12, use_trend=True, use_box_cox=False)
    elif model_type == 'EnsembleForecaster':
        forecaster = EnsembleForecaster([
            ("ses", ExponentialSmoothing(seasonal="multiplicative", sp=12)),
            (
                "holt",
                ExponentialSmoothing(trend="add",
                                     damped_trend=False,
                                     seasonal="multiplicative",
                                     sp=12),
            ),
            (
                "damped",
                ExponentialSmoothing(trend="add",
                                     damped_trend=True,
                                     seasonal="multiplicative",
                                     sp=12),
            ),
        ])

    try:
        forecaster.fit(y_train)
    except:
        forecaster.fit(y_train + 1)

    y_pred = forecaster.predict(fh)
    dump(
        forecaster,
        f'{model_storage_path}/{customer_id}_{model_type}_{start}_{end}_{test_size_month}.model'
    )

    print('sMAPE Loss :', smape_loss(y_pred, y_test))
    plot = plot_series(y_train,
                       y_test,
                       y_pred,
                       labels=["y_train", "y_test", "y_pred"])
    return plot
plt.rcParams["figure.figsize"] = [16, 7]
# for fancy plots
plt.style.use('ggplot')

df = pd.read_csv(
    'https://raw.githubusercontent.com/selva86/datasets/master/a10.csv',
    parse_dates=['date'],
    index_col="date")

df.index = pd.PeriodIndex(df.index, freq="M")

series = df.T.iloc[0]

plot_series(series)

model_auto = AutoARIMA(sp=12, suppress_warnings=True).fit(series)

summary = model_auto.summary()


def get_params(summary_text):
    full = re.findall(r'SARIMAX\(.*?\)x\(.*?\)', summary_text)[0]
    info = [int(_) for _ in re.findall(r'\d+', full)]
    return info


p, d, q, P, D, Q, S = get_params(summary.as_text())

y_train, y_test = temporal_train_test_split(series, test_size=24)

fh = ForecastingHorizon(y_test.index, is_relative=False)
Beispiel #11
0
                                  sp=12)
forecaster.fit(y_train)
y_pred = forecaster.predict(fh)
plot_ys(y_train, y_test, y_pred, labels=["y_train", "y_test", "y_pred"])
st.pyplot()
st.write("smape_loss(y_test, y_pred):", smape_loss(y_test, y_pred))

st.write('''
    另一个常见模型是ARIMA模型。 
    在sktime中,我们连接pmdarima,这是一个用于自动选择最佳ARIMA模型的软件包。 
    这是因为搜索了许多可能的模型参数,因此可能需要更长的时间。
''')

from sktime.forecasting.arima import AutoARIMA

forecaster = AutoARIMA(sp=12, suppress_warnings=True)
forecaster.fit(y_train)
y_pred = forecaster.predict(fh)
plot_ys(y_train, y_test, y_pred, labels=["y_train", "y_test", "y_pred"])
st.pyplot()
st.write("smape_loss(y_test, y_pred):", smape_loss(y_test, y_pred))

st.write('''
    ### 4.3 Compositite model building
    sktime提供了用于组合模型构建的模块化API,以进行预测。 
    
    * Ensembling
    像scikit-learn一样,sktime提供了一个元预测器来集成多种预测算法。
    例如,我们可以如下组合指数平滑的不同变体:
''')
from sklearn.ensemble import RandomForestRegressor
from sktime.forecasting.model_selection import temporal_train_test_split
from sktime.performance_metrics.forecasting import smape_loss

regressor = RandomForestRegressor()
forecaster = ReducedRegressionForecaster(regressor, window_length=12)
forecaster.fit(y_train)
y_pred = forecaster.predict(fh)

plot_ys(y_train, y_test, y_pred, labels=['y_train', 'y_test', 'y_pred'])
smape_loss(y_test, y_pred)

"""Forcasting with autoarima"""

from sktime.forecasting.arima import AutoARIMA
forecaster = AutoARIMA(sp=12)
forecaster.fit(y_train)

y_pred = forecaster.predict(fh)
plot_ys(y_train, y_test, y_pred, labels=["y_train", "y_test", "y_pred"]);
smape_loss(y_test, y_pred)

"""Time Series Classification"""

from sktime.datasets import load_arrow_head
from sktime.classification.compose import TimeSeriesForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

X, y = load_arrow_head(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y)