Beispiel #1
0
def test_pipeline_behavior():
    wineind = load_wineind()
    train, test = wineind[:125], wineind[125:]

    pipeline = Pipeline([
        ("fourier", FourierFeaturizer(m=12)),
        ("arima", AutoARIMA(seasonal=False, stepwise=True,
                            suppress_warnings=True,
                            maxiter=3, error_action='ignore'))
    ])

    # Quick assertions on indexing
    assert len(pipeline) == 2

    pipeline.fit(train)
    preds = pipeline.predict(5)
    assert preds.shape[0] == 5

    assert pipeline._final_estimator.model_.fit_with_exog_

    # Assert that when the n_periods kwarg is set manually and incorrectly for
    # the fourier transformer, we get a ValueError
    kwargs = {
        "fourier__n_periods": 10
    }

    with pytest.raises(ValueError) as ve:
        pipeline.predict(3, **kwargs)
    assert "'n_periods'" in pytest_error_str(ve)

    # Assert that we can update the model
    pipeline.update(test, maxiter=5)

    # And that the fourier transformer was updated properly...
    assert pipeline.steps_[0][1].n_ == wineind.shape[0]
Beispiel #2
0
def train_arima_model(data_train, date_init, date_fin, op_red, type_day, transform='decompose-Fourier'
                      , type_decompose='additive', n_decompose=1, n_coeff_fourier=4, filter_decompose=None):
    num_cluster = data_train.name
    data_train = np.array(data_train)[~np.isnan(np.array(data_train))]
    type_model = 'arima'

    if transform == 'decompose-Fourier' or transform == 'decompose-Fourier-log':
        print('n_decompose: ', n_decompose, 'n_coeff_fourier: ', n_coeff_fourier)
        forecast_seasonal, trend_residual, n_diffs, periods_decompose, m_f, k_f = get_transform_model(data_train, transform=transform
                                                                                        , type_decompose=type_decompose
                                                                                        , n_decompose=n_decompose
                                                                                        , n_coeff_fourier=n_coeff_fourier)
        pipeline_trend_residual = Pipeline([
            ('fourier', ppc.FourierFeaturizer(m=m_f, k=k_f))
            , ("model", pm.AutoARIMA(d=n_diffs, seasonal=False, trace=True, error_action='ignore'
                                     , maxiter=30, max_p=4, max_q=4, suppress_warnings=True, with_intercept=True))])
        print('\t\t\t training model...')
        pipeline_trend_residual.fit(trend_residual)
        print(pipeline_trend_residual.summary())
        # aic_model = pipeline_trend_residual.steps[-1][1].model_.aic()
        print('\t\t\t saving model...')
        save_model_dir(pipeline_trend_residual, transform, num_cluster, op_red, type_day, type_model, date_init
                       , date_fin, periods_decompose, str(n_decompose), type_decompose)
        print('\t\t\t finish save model...')
    elif transform == 'Fourier':

        n_diffs, m_f, k_f = get_transform_model(data_train, transform=transform, n_coeff_fourier=n_coeff_fourier)
        pipeline = Pipeline([
            ('fourier', ppc.FourierFeaturizer(m=m_f, k=k_f))
            , ("model", pm.AutoARIMA(d=n_diffs, seasonal=False, trace=True, error_action='ignore'
                                     , maxiter=30, max_p=4, max_q=4, suppress_warnings=True, with_intercept=True))])
        pipeline.fit(data_train)
        save_model_dir(pipeline, transform, num_cluster, op_red, type_day, type_model, date_init, date_fin)

    elif transform == 'decompose':
        forecast_seasonal, trend_residual, n_diffs, ns_diffs, periods_decompose, m_f = get_transform_model(data_train
                                                                                             , transform=transform
                                                                                             , type_decompose=type_decompose
                                                                                             , n_decompose=n_decompose)
        pipeline_trend_residual = Pipeline(
            [("model", pm.AutoARIMA(d=n_diffs, D=ns_diffs, seasonal=True, m=m_f, trace=True, error_action='ignore'
                                    , maxiter=30, max_p=4, max_q=4, suppress_warnings=True, with_intercept=True))])
        pipeline_trend_residual.fit(trend_residual)
        save_model_dir(pipeline_trend_residual, transform, num_cluster, op_red, type_day, type_model, date_init
                       , date_fin, periods_decompose, str(n_decompose), type_decompose)
    elif transform == 'normal':
        n_diffs, ns_diffs, m_f = get_transform_model(data_train, transform=transform)
        pipeline = Pipeline(
            [("model", pm.AutoARIMA(d=n_diffs, D=ns_diffs, seasonal=True, m=m_f, trace=True, error_action='ignore'
                                    , maxiter=30, max_p=4, max_q=4, suppress_warnings=True, with_intercept=True))])
        pipeline.fit(data_train)
        save_model_dir(pipeline, transform, num_cluster, op_red, type_day, type_model, date_init, date_fin)
    else:
        raise ValueError('invalid variable transform {}.'.format(transform))
import pmdarima as pm
from pmdarima.model_selection import train_test_split
from pmdarima.pipeline import Pipeline
from pmdarima.preprocessing import BoxCoxEndogTransformer
import pickle

# Load/split your data
y = pm.datasets.load_sunspots()
train, test = train_test_split(y, train_size=2700)

# Define and fit your pipeline
pipeline = Pipeline([
    ('boxcox',
     BoxCoxEndogTransformer(lmbda2=1e-6)),  # lmbda2 avoids negative values
    ('arima',
     pm.AutoARIMA(seasonal=True, m=12, suppress_warnings=True, trace=True))
])

pipeline.fit(train)

# Serialize your model just like you would in scikit:
with open('model.pkl', 'wb') as pkl:
    pickle.dump(pipeline, pkl)

# Load it and make predictions seamlessly:
with open('model.pkl', 'rb') as pkl:
    mod = pickle.load(pkl)
    print(mod.predict(15))
# [25.20580375 25.05573898 24.4263037  23.56766793 22.67463049 21.82231043
# 21.04061069 20.33693017 19.70906027 19.1509862  18.6555793  18.21577243
# 17.8250318  17.47750614 17.16803394]
Beispiel #4
0
    def forecast(self, forecast_horizon: int = 96):
        super().forecast(forecast_horizon)

        print(
            "Running ARIMA forecast for Currency-pair: {} using forecast horizon: {}",
            self.currency_pair.upper(), forecast_horizon)
        print("Dataset: ", self.currency_pair.upper())
        print(self.training_data.head(5))
        print(".....\t.........\t...")
        print(self.training_data.tail(5))

        # define and fit the pipeline/model
        pipeline = Pipeline([('boxcox', BoxCoxEndogTransformer(lmbda2=1e-6)),
                             ('arima',
                              pm.AutoARIMA(start_p=1,
                                           start_q=1,
                                           max_p=3,
                                           max_q=3,
                                           d=1,
                                           D=1,
                                           start_P=0,
                                           error_action='ignore',
                                           suppress_warnings=True,
                                           stepwise=True,
                                           seasonal=True,
                                           m=12,
                                           trace=True))])
        pipeline.fit(self.training_data['close'])
        # model = pm.auto_arima(self.training_data["close"], seasonal=True, m=12)

        # serialize model
        model_file = f"intermediates/arima_{self.currency_pair}.pkl"
        with open(model_file, "wb") as file:
            pickle.dump(pipeline, file)

        # load model and make predictions seamlessly
        with open(model_file, "rb") as file:
            model = pickle.load(file)

        # make the forecasts
        predictions = model.predict(n_periods=forecast_horizon,
                                    return_conf_int=True)
        print("ARIMA forecast ... complete")
        collated_results = DataFrame.from_records([{
            "forecast":
            value,
            "error":
            abs(bounds[0] - bounds[1]) / 2,
            "forecast_lower":
            bounds[0],
            "forecast_upper":
            bounds[1]
        } for value, bounds in zip(predictions[0], predictions[1])])

        self.forecasts = collated_results["forecast"]
        self.errors = collated_results["error"]
        self.forecasts_lower = collated_results["forecast_lower"]
        self.forecasts_upper = collated_results["forecast_upper"]
        self.forecasts_raw = collated_results

        collated_results.to_csv(
            f"output/{self.currency_pair}__{self.model_name.lower()}__{forecast_horizon}__forecasts.csv"
        )
        print(collated_results)