예제 #1
0
def test_pipeline_behavior():
    wineind = load_wineind()
    train, test = wineind[:125], wineind[125:]

    pipeline = Pipeline([
        ("fourier", FourierFeaturizer(m=12)),
        ("arima", AutoARIMA(seasonal=False, stepwise=True,
                            suppress_warnings=True,
                            maxiter=3, error_action='ignore'))
    ])

    # Quick assertions on indexing
    assert len(pipeline) == 2

    pipeline.fit(train)
    preds = pipeline.predict(5)
    assert preds.shape[0] == 5

    assert pipeline._final_estimator.model_.fit_with_exog_

    # Assert that when the n_periods kwarg is set manually and incorrectly for
    # the fourier transformer, we get a ValueError
    kwargs = {
        "fourier__n_periods": 10
    }

    with pytest.raises(ValueError) as ve:
        pipeline.predict(3, **kwargs)
    assert "'n_periods'" in pytest_error_str(ve)

    # Assert that we can update the model
    pipeline.update(test, maxiter=5)

    # And that the fourier transformer was updated properly...
    assert pipeline.steps_[0][1].n_ == wineind.shape[0]
예제 #2
0
    def test_non_unique_names(self):
        # Will fail since the same name repeated twice
        with pytest.raises(ValueError) as ve:
            Pipeline([("stage", BoxCoxEndogTransformer()),
                      ("stage", ARIMA(order=(0, 0, 0)))])

        assert "not unique" in pytest_error_str(ve)
예제 #3
0
    def test_names_in_params(self):
        # Will fail because 'steps' is a param of Pipeline
        with pytest.raises(ValueError) as ve:
            Pipeline([("steps", BoxCoxEndogTransformer()),
                      ("stage", ARIMA(order=(0, 0, 0)))])

        assert "names conflict" in pytest_error_str(ve)
예제 #4
0
    def test_names_double_underscore(self):
        # Will fail since the "__" is reserved for parameter names
        with pytest.raises(ValueError) as ve:
            Pipeline([("stage__1", BoxCoxEndogTransformer()),
                      ("stage", ARIMA(order=(0, 0, 0)))])

        assert "must not contain __" in pytest_error_str(ve)
예제 #5
0
    def test_non_transformer_in_steps(self):
        # Will fail since the first stage is not a transformer
        with pytest.raises(TypeError) as ve:
            Pipeline([
                ("stage1", (lambda *args, **kwargs: None)),  # Fail
                ("stage2", AutoARIMA())
            ])

        assert "instances of BaseTransformer" in pytest_error_str(ve)
예제 #6
0
def basic_pipeline(data):
    pipeline = Pipeline(steps=[
        ("fourier", FourierFeaturizer(k=3, m=7)),
        ("arima", AutoARIMA(out_of_sample_size=60)),
    ])
    return GroupedPmdarima(pipeline).fit(
        data.df,
        data.key_columns,
        "y",
        "ds",
    )
예제 #7
0
def model(data):

    arima = GroupedPmdarima(model_template=Pipeline(
        steps=[("arima",
                AutoARIMA(out_of_sample_size=60, max_order=7))]), ).fit(
                    df=data.df,
                    group_key_columns=data.key_columns,
                    y_col="y",
                    datetime_col="ds",
                    silence_warnings=True,
                )
    return arima
예제 #8
0
def pipeline_override_d(data):
    pipeline = Pipeline(steps=[("arima", AutoARIMA(out_of_sample_size=30))])
    util = PmdarimaAnalyzer(df=data.df,
                            group_key_columns=data.key_columns,
                            y_col="y",
                            datetime_col="ds")
    ndiffs = util.calculate_ndiffs(alpha=0.2, test="kpss", max_d=7)
    nsdiffs = util.calculate_nsdiffs(m=7, test="ocsb", max_D=7)
    return GroupedPmdarima(pipeline).fit(
        df=data.df,
        group_key_columns=data.key_columns,
        y_col="y",
        datetime_col="ds",
        ndiffs=ndiffs,
        nsdiffs=nsdiffs,
        silence_warnings=True,
    )
예제 #9
0
def test_order_does_not_matter_with_date_transformer():
    train_y_dates, test_y_dates, train_X_dates, test_X_dates = \
        train_test_split(y_dates, X_dates, test_size=15)

    pipeline_a = Pipeline([
        ('fourier', FourierFeaturizer(m=3, prefix="FOURIER")),
        ('dates', DateFeaturizer(column_name="date", prefix="DATE")),
        ("arima",
         AutoARIMA(seasonal=False,
                   stepwise=True,
                   suppress_warnings=True,
                   maxiter=3,
                   error_action='ignore'))
    ]).fit(train_y_dates, train_X_dates)
    Xt_a = pipeline_a.transform(exogenous=test_X_dates)
    pred_a = pipeline_a.predict(exogenous=test_X_dates)

    pipeline_b = Pipeline([
        ('dates', DateFeaturizer(column_name="date", prefix="DATE")),
        ('fourier', FourierFeaturizer(m=3, prefix="FOURIER")),
        ("arima",
         AutoARIMA(seasonal=False,
                   stepwise=True,
                   suppress_warnings=True,
                   maxiter=3,
                   error_action='ignore'))
    ]).fit(train_y_dates, train_X_dates)
    Xt_b = pipeline_b.transform(exogenous=test_X_dates)
    pred_b = pipeline_b.predict(exogenous=test_X_dates)

    # dates in A should differ from those in B
    assert pipeline_a.x_feats_[0].startswith("FOURIER")
    assert pipeline_a.x_feats_[-1].startswith("DATE")

    assert pipeline_b.x_feats_[0].startswith("DATE")
    assert pipeline_b.x_feats_[-1].startswith("FOURIER")

    # columns should be identical once ordered appropriately
    assert Xt_a.equals(Xt_b[pipeline_a.x_feats_])

    # forecasts should be identical
    assert_array_almost_equal(pred_a, pred_b, decimal=3)
예제 #10
0
    def test_bad_last_stage(self, stages):
        # Will fail since the last stage is not an estimator
        with pytest.raises(TypeError) as ve:
            Pipeline(stages)

        assert "Last step of Pipeline should be" in pytest_error_str(ve)
예제 #11
0
             ("stage2", FourierFeaturizer(m=12))]
        ]
    )
    def test_bad_last_stage(self, stages):
        # Will fail since the last stage is not an estimator
        with pytest.raises(TypeError) as ve:
            Pipeline(stages)

        assert "Last step of Pipeline should be" in pytest_error_str(ve)


@pytest.mark.parametrize(
    'pipe,kwargs,expected', [
        pytest.param(
            Pipeline([
                ("boxcox", BoxCoxEndogTransformer()),
                ("arima", AutoARIMA())
            ]),
            {},
            {"boxcox": {}, "arima": {}}
        ),

        pytest.param(
            Pipeline([
                ("boxcox", BoxCoxEndogTransformer()),
                ("arima", AutoARIMA())
            ]),
            {"boxcox__lmdba1": 0.001},
            {"boxcox": {"lmdba1": 0.001}, "arima": {}}
        ),
    ]
)
예제 #12
0
@pytest.mark.parametrize('cv', [
    SlidingWindowForecastCV(window_size=100, step=24, h=1),
    RollingForecastCV(initial=120, step=12, h=1),
])
@pytest.mark.parametrize(
    'est', [
        ARIMA(order=(2, 1, 1), maxiter=2, simple_differencing=True),
        ARIMA(order=(1, 1, 2),
              seasonal_order=(0, 1, 1, 12),
              maxiter=2,
              simple_differencing=True,
              suppress_warnings=True),
        Pipeline([
            ("fourier", FourierFeaturizer(m=12)),
            ("arima", ARIMA(order=(2, 1, 0),
                            maxiter=2,
                            simple_differencing=True))
        ])
    ]
)
@pytest.mark.parametrize('verbose', [0, 2, 4])
@pytest.mark.parametrize('X', [None, exogenous])
def test_cv_scores(cv, est, verbose, X):
    scores = cross_val_score(
        est, y, X=X, scoring='mean_squared_error',
        cv=cv, verbose=verbose)
    assert isinstance(scores, np.ndarray)


@pytest.mark.parametrize('cv', [
    SlidingWindowForecastCV(window_size=100, step=12, h=12),
        series_count=3,
        series_size=365 * 3,
        start_dt="2019-01-01",
        days_period=1,
    )

    training_data = generated_data.df
    group_key_columns = generated_data.key_columns

    pipeline = Pipeline(
        steps=[
            (
                "arima",
                AutoARIMA(
                    max_order=14,
                    out_of_sample_size=90,
                    suppress_warnings=True,
                    error_action="ignore",
                ),
            )
        ]
    )

    diff_analyzer = PmdarimaAnalyzer(
        df=training_data,
        group_key_columns=group_key_columns,
        y_col="y",
        datetime_col="ds",
    )
    ndiff = diff_analyzer.calculate_ndiffs(
        alpha=0.05,
            prefix = valfiles_oi[ind].split(
                '_')[0] + '-validation-{}d-'.format(pred)
            #滑动窗口
            for i in range(past + pred - 1, len(price)):
                print(
                    '===========当前训练的是{}数据集,目标节点是{}=================='.format(
                        valfiles_oi[ind].split('_')[0],
                        val_3m.index[(i - (past + pred) + 1)]))
                sample = price[(i - (past + pred) + 1):(i + 1)]
                train, test = train_test_split(sample, train_size=past)
                pipeline = Pipeline([
                    # ('boxcox', BoxCoxEndogTransformer(lmbda2=1e-6)),  # lmbda2 avoids negative values
                    ('arima',
                     pm.AutoARIMA(seasonal=True,
                                  m=1,
                                  suppress_warnings=True,
                                  trace=True,
                                  error_action="ignore"))
                ])

                pipeline.fit(train)
                pred_result = pipeline.predict(pred)
                print('pred_result is : ', pred_result)
                print(
                    '====================一次训练结束=============================\n\n\n'
                )

                val_index = prefix + val_3m.index[(i - (past + pred) + 1)]
                if use_diff:
                    val_label = 1 if pred_result[-1] > 0 else 0
예제 #15
0
    generated_data = generate_example_data(
        column_count=3,
        series_count=2,
        series_size=365 * 3,
        start_dt="2019-01-01",
        days_period=1,
    )

    training_data = generated_data.df
    group_key_columns = generated_data.key_columns

    pipeline_obj = Pipeline(
        steps=[
            (
                "box",
                BoxCoxEndogTransformer(lmbda2=0.4, neg_action="raise", floor=1e-12),
            ),
            ("arima", AutoARIMA(out_of_sample_size=60, max_p=4, max_q=4, max_d=4)),
        ]
    )
    pipeline_arima = GroupedPmdarima(model_template=pipeline_obj).fit(
        df=training_data,
        group_key_columns=group_key_columns,
        y_col="y",
        datetime_col="ds",
        silence_warnings=True,
    )

    # Save to local directory
    save_dir = "/tmp/group_pmdarima/pipeline.gpmd"
    pipeline_arima.save(save_dir)
예제 #16
0
            # Two transformers
            [("stage1", BoxCoxEndogTransformer()),
             ("stage2", FourierFeaturizer(m=12))]
        ])
    def test_bad_last_stage(self, stages):
        # Will fail since the last stage is not an estimator
        with pytest.raises(TypeError) as ve:
            Pipeline(stages)

        assert "Last step of Pipeline should be" in pytest_error_str(ve)


@pytest.mark.parametrize('pipe,kwargs,expected', [
    pytest.param(
        Pipeline([("boxcox", BoxCoxEndogTransformer()),
                  ("arima", AutoARIMA())]), {}, {
                      "boxcox": {},
                      "arima": {}
                  }),
    pytest.param(
        Pipeline([("boxcox", BoxCoxEndogTransformer()),
                  ("arima", AutoARIMA())]), {"boxcox__lmdba1": 0.001}, {
                      "boxcox": {
                          "lmdba1": 0.001
                      },
                      "arima": {}
                  }),
])
def test_get_kwargs(pipe, kwargs, expected):
    # Test we get the kwargs we expect
    kw = pipe._get_kwargs(**kwargs)
예제 #17
0
from sklearn.base import clone
from pmdarima.arima import ARIMA, AutoARIMA
from pmdarima.pipeline import Pipeline
from pmdarima.datasets import load_wineind
from pmdarima.preprocessing import FourierFeaturizer
import pytest

y = load_wineind()


@pytest.mark.parametrize(
    'est', [
        ARIMA(order=(2, 1, 1), seasonal_order=(0, 0, 0, 1)),
        AutoARIMA(seasonal=False, maxiter=3),
        Pipeline([
            ("fourier", FourierFeaturizer(m=12)),
            ("arima", AutoARIMA(seasonal=False, stepwise=True,
                                suppress_warnings=True, d=1, max_p=2, max_q=0,
                                start_q=0, start_p=1,
                                maxiter=3, error_action='ignore'))
        ])
    ]
)
def test_clonable(est):
    # fit it, then clone it
    est.fit(y)
    est2 = clone(est)
    assert isinstance(est2, est.__class__)
    assert est is not est2
import pmdarima as pm
from pmdarima.model_selection import train_test_split
from pmdarima.pipeline import Pipeline
from pmdarima.preprocessing import BoxCoxEndogTransformer
import pickle

# Load/split your data
y = pm.datasets.load_sunspots()
train, test = train_test_split(y, train_size=2700)

# Define and fit your pipeline
pipeline = Pipeline([
    ('boxcox',
     BoxCoxEndogTransformer(lmbda2=1e-6)),  # lmbda2 avoids negative values
    ('arima',
     pm.AutoARIMA(seasonal=True, m=12, suppress_warnings=True, trace=True))
])

pipeline.fit(train)

# Serialize your model just like you would in scikit:
with open('model.pkl', 'wb') as pkl:
    pickle.dump(pipeline, pkl)

# Load it and make predictions seamlessly:
with open('model.pkl', 'rb') as pkl:
    mod = pickle.load(pkl)
    print(mod.predict(15))
# [25.20580375 25.05573898 24.4263037  23.56766793 22.67463049 21.82231043
# 21.04061069 20.33693017 19.70906027 19.1509862  18.6555793  18.21577243
# 17.8250318  17.47750614 17.16803394]
예제 #19
0
    def forecast(self, forecast_horizon: int = 96):
        super().forecast(forecast_horizon)

        print(
            "Running ARIMA forecast for Currency-pair: {} using forecast horizon: {}",
            self.currency_pair.upper(), forecast_horizon)
        print("Dataset: ", self.currency_pair.upper())
        print(self.training_data.head(5))
        print(".....\t.........\t...")
        print(self.training_data.tail(5))

        # define and fit the pipeline/model
        pipeline = Pipeline([('boxcox', BoxCoxEndogTransformer(lmbda2=1e-6)),
                             ('arima',
                              pm.AutoARIMA(start_p=1,
                                           start_q=1,
                                           max_p=3,
                                           max_q=3,
                                           d=1,
                                           D=1,
                                           start_P=0,
                                           error_action='ignore',
                                           suppress_warnings=True,
                                           stepwise=True,
                                           seasonal=True,
                                           m=12,
                                           trace=True))])
        pipeline.fit(self.training_data['close'])
        # model = pm.auto_arima(self.training_data["close"], seasonal=True, m=12)

        # serialize model
        model_file = f"intermediates/arima_{self.currency_pair}.pkl"
        with open(model_file, "wb") as file:
            pickle.dump(pipeline, file)

        # load model and make predictions seamlessly
        with open(model_file, "rb") as file:
            model = pickle.load(file)

        # make the forecasts
        predictions = model.predict(n_periods=forecast_horizon,
                                    return_conf_int=True)
        print("ARIMA forecast ... complete")
        collated_results = DataFrame.from_records([{
            "forecast":
            value,
            "error":
            abs(bounds[0] - bounds[1]) / 2,
            "forecast_lower":
            bounds[0],
            "forecast_upper":
            bounds[1]
        } for value, bounds in zip(predictions[0], predictions[1])])

        self.forecasts = collated_results["forecast"]
        self.errors = collated_results["error"]
        self.forecasts_lower = collated_results["forecast_lower"]
        self.forecasts_upper = collated_results["forecast_upper"]
        self.forecasts_raw = collated_results

        collated_results.to_csv(
            f"output/{self.currency_pair}__{self.model_name.lower()}__{forecast_horizon}__forecasts.csv"
        )
        print(collated_results)
예제 #20
0
import numpy as np
from unittest import mock

y = load_wineind()
exogenous = np.random.RandomState(1).rand(y.shape[0], 2)


@pytest.mark.parametrize('cv', [
    SlidingWindowForecastCV(window_size=100, step=24, h=1),
    RollingForecastCV(initial=150, step=12, h=1),
])
@pytest.mark.parametrize('est', [
    ARIMA(order=(2, 1, 1)),
    ARIMA(
        order=(1, 1, 2), seasonal_order=(0, 1, 1, 12), suppress_warnings=True),
    Pipeline([("fourier", FourierFeaturizer(m=12)),
              ("arima", ARIMA(order=(2, 1, 0), maxiter=3))])
])
@pytest.mark.parametrize('verbose', [0, 2, 4])
@pytest.mark.parametrize('exog', [None, exogenous])
def test_cv_scores(cv, est, verbose, exog):
    scores = cross_val_score(est,
                             y,
                             exogenous=exog,
                             scoring='mean_squared_error',
                             cv=cv,
                             verbose=verbose)
    assert isinstance(scores, np.ndarray)


@pytest.mark.parametrize('cv', [
    SlidingWindowForecastCV(window_size=100, step=12, h=12),
예제 #21
0
def train_arima_model(data_train, date_init, date_fin, op_red, type_day, transform='decompose-Fourier'
                      , type_decompose='additive', n_decompose=1, n_coeff_fourier=4, filter_decompose=None):
    num_cluster = data_train.name
    data_train = np.array(data_train)[~np.isnan(np.array(data_train))]
    type_model = 'arima'

    if transform == 'decompose-Fourier' or transform == 'decompose-Fourier-log':
        print('n_decompose: ', n_decompose, 'n_coeff_fourier: ', n_coeff_fourier)
        forecast_seasonal, trend_residual, n_diffs, periods_decompose, m_f, k_f = get_transform_model(data_train, transform=transform
                                                                                        , type_decompose=type_decompose
                                                                                        , n_decompose=n_decompose
                                                                                        , n_coeff_fourier=n_coeff_fourier)
        pipeline_trend_residual = Pipeline([
            ('fourier', ppc.FourierFeaturizer(m=m_f, k=k_f))
            , ("model", pm.AutoARIMA(d=n_diffs, seasonal=False, trace=True, error_action='ignore'
                                     , maxiter=30, max_p=4, max_q=4, suppress_warnings=True, with_intercept=True))])
        print('\t\t\t training model...')
        pipeline_trend_residual.fit(trend_residual)
        print(pipeline_trend_residual.summary())
        # aic_model = pipeline_trend_residual.steps[-1][1].model_.aic()
        print('\t\t\t saving model...')
        save_model_dir(pipeline_trend_residual, transform, num_cluster, op_red, type_day, type_model, date_init
                       , date_fin, periods_decompose, str(n_decompose), type_decompose)
        print('\t\t\t finish save model...')
    elif transform == 'Fourier':

        n_diffs, m_f, k_f = get_transform_model(data_train, transform=transform, n_coeff_fourier=n_coeff_fourier)
        pipeline = Pipeline([
            ('fourier', ppc.FourierFeaturizer(m=m_f, k=k_f))
            , ("model", pm.AutoARIMA(d=n_diffs, seasonal=False, trace=True, error_action='ignore'
                                     , maxiter=30, max_p=4, max_q=4, suppress_warnings=True, with_intercept=True))])
        pipeline.fit(data_train)
        save_model_dir(pipeline, transform, num_cluster, op_red, type_day, type_model, date_init, date_fin)

    elif transform == 'decompose':
        forecast_seasonal, trend_residual, n_diffs, ns_diffs, periods_decompose, m_f = get_transform_model(data_train
                                                                                             , transform=transform
                                                                                             , type_decompose=type_decompose
                                                                                             , n_decompose=n_decompose)
        pipeline_trend_residual = Pipeline(
            [("model", pm.AutoARIMA(d=n_diffs, D=ns_diffs, seasonal=True, m=m_f, trace=True, error_action='ignore'
                                    , maxiter=30, max_p=4, max_q=4, suppress_warnings=True, with_intercept=True))])
        pipeline_trend_residual.fit(trend_residual)
        save_model_dir(pipeline_trend_residual, transform, num_cluster, op_red, type_day, type_model, date_init
                       , date_fin, periods_decompose, str(n_decompose), type_decompose)
    elif transform == 'normal':
        n_diffs, ns_diffs, m_f = get_transform_model(data_train, transform=transform)
        pipeline = Pipeline(
            [("model", pm.AutoARIMA(d=n_diffs, D=ns_diffs, seasonal=True, m=m_f, trace=True, error_action='ignore'
                                    , maxiter=30, max_p=4, max_q=4, suppress_warnings=True, with_intercept=True))])
        pipeline.fit(data_train)
        save_model_dir(pipeline, transform, num_cluster, op_red, type_day, type_model, date_init, date_fin)
    else:
        raise ValueError('invalid variable transform {}.'.format(transform))