예제 #1
0
def test_same():
    y = [1, 2, 3]
    trans = BoxCoxEndogTransformer(lmbda=0)
    log_trans = LogEndogTransformer()
    y_t, _ = trans.fit_transform(y)
    log_y_t, _ = log_trans.fit_transform(y)
    assert_array_almost_equal(log_y_t, y_t)
예제 #2
0
    def test_invertible_when_lam2(self):
        y = self.y
        trans = BoxCoxEndogTransformer(lmbda=2., lmbda2=2.)
        y_t, _ = trans.fit_transform(y)

        # When we invert, it will not be the same
        y_prime, _ = trans.inverse_transform(y_t)
        assert_array_almost_equal(y, y_prime)
예제 #3
0
    def test_no_warning_on_ignore(self):
        y = self.y
        trans = BoxCoxEndogTransformer(lmbda=2., neg_action="ignore")
        y_t, _ = trans.fit_transform(y)

        # When we invert, it will not be the same
        y_prime, _ = trans.inverse_transform(y_t)
        assert not np.allclose(y_prime, y)
예제 #4
0
class TestIllegal:

    def test_non_unique_names(self):
        # Will fail since the same name repeated twice
        with pytest.raises(ValueError) as ve:
            Pipeline([
                ("stage", BoxCoxEndogTransformer()),
                ("stage", ARIMA(order=(0, 0, 0)))
            ])

        assert "not unique" in pytest_error_str(ve)

    def test_names_in_params(self):
        # Will fail because 'steps' is a param of Pipeline
        with pytest.raises(ValueError) as ve:
            Pipeline([
                ("steps", BoxCoxEndogTransformer()),
                ("stage", ARIMA(order=(0, 0, 0)))
            ])

        assert "names conflict" in pytest_error_str(ve)

    def test_names_double_underscore(self):
        # Will fail since the "__" is reserved for parameter names
        with pytest.raises(ValueError) as ve:
            Pipeline([
                ("stage__1", BoxCoxEndogTransformer()),
                ("stage", ARIMA(order=(0, 0, 0)))
            ])

        assert "must not contain __" in pytest_error_str(ve)

    def test_non_transformer_in_steps(self):
        # Will fail since the first stage is not a transformer
        with pytest.raises(TypeError) as ve:
            Pipeline([
                ("stage1", (lambda *args, **kwargs: None)),  # Fail
                ("stage2", AutoARIMA())
            ])

        assert "instances of BaseTransformer" in pytest_error_str(ve)

    @pytest.mark.parametrize(
        'stages', [
            # Nothing BUT a transformer
            [("stage1", BoxCoxEndogTransformer())],

            # Two transformers
            [("stage1", BoxCoxEndogTransformer()),
             ("stage2", FourierFeaturizer(m=12))]
        ]
    )
    def test_bad_last_stage(self, stages):
        # Will fail since the last stage is not an estimator
        with pytest.raises(TypeError) as ve:
            Pipeline(stages)

        assert "Last step of Pipeline should be" in pytest_error_str(ve)
예제 #5
0
    def test_expected_warning(self):
        y = self.y
        trans = BoxCoxEndogTransformer(lmbda=2., neg_action="warn")
        with pytest.warns(UserWarning):
            y_t, _ = trans.fit_transform(y)

        # When we invert, it will not be the same
        y_prime, _ = trans.inverse_transform(y_t)
        assert not np.allclose(y_prime, y)
예제 #6
0
def test_invertible(X):
    trans = BoxCoxEndogTransformer()
    y_t, e_t = trans.fit_transform(loggamma, X=X)
    y_prime, e_prime = trans.inverse_transform(y_t, X=e_t)

    assert_array_almost_equal(loggamma, y_prime)

    # X should all be the same too
    if X is None:
        assert X is e_t is e_prime is None
    else:
        assert_array_almost_equal(X, e_t)
        assert_array_almost_equal(X, e_prime)
예제 #7
0
def test_invertible(exog):
    trans = BoxCoxEndogTransformer()
    y_t, e_t = trans.fit_transform(loggamma, exogenous=exog)
    y_prime, e_prime = trans.inverse_transform(y_t, exogenous=e_t)

    assert_array_almost_equal(loggamma, y_prime)

    # exog should all be the same too
    if exog is None:
        assert exog is e_t is e_prime is None
    else:
        assert_array_almost_equal(exog, e_t)
        assert_array_almost_equal(exog, e_prime)
예제 #8
0
    def test_names_double_underscore(self):
        # Will fail since the "__" is reserved for parameter names
        with pytest.raises(ValueError) as ve:
            Pipeline([("stage__1", BoxCoxEndogTransformer()),
                      ("stage", ARIMA(order=(0, 0, 0)))])

        assert "must not contain __" in pytest_error_str(ve)
예제 #9
0
    def test_names_in_params(self):
        # Will fail because 'steps' is a param of Pipeline
        with pytest.raises(ValueError) as ve:
            Pipeline([("steps", BoxCoxEndogTransformer()),
                      ("stage", ARIMA(order=(0, 0, 0)))])

        assert "names conflict" in pytest_error_str(ve)
예제 #10
0
    def test_non_unique_names(self):
        # Will fail since the same name repeated twice
        with pytest.raises(ValueError) as ve:
            Pipeline([("stage", BoxCoxEndogTransformer()),
                      ("stage", ARIMA(order=(0, 0, 0)))])

        assert "not unique" in pytest_error_str(ve)
예제 #11
0
def test_pipeline_behavior():
    pipeline = Pipeline([
        ("fourier", FourierFeaturizer(m=12)),
        ("boxcox", BoxCoxEndogTransformer()),
        ("arima", AutoARIMA(seasonal=False, stepwise=True,
                            suppress_warnings=True, d=1, max_p=2, max_q=0,
                            start_q=0, start_p=1,
                            maxiter=3, error_action='ignore'))
    ])

    # Quick assertions on indexing
    assert len(pipeline) == 3

    pipeline.fit(train)
    preds = pipeline.predict(5)
    assert preds.shape[0] == 5

    assert pipeline._final_estimator.model_.fit_with_exog_

    # Assert that when the n_periods kwarg is set manually and incorrectly for
    # the fourier transformer, we get a ValueError
    kwargs = {
        "fourier__n_periods": 10
    }

    with pytest.raises(ValueError) as ve:
        pipeline.predict(3, **kwargs)
    assert "'n_periods'" in pytest_error_str(ve)

    # Assert that we can update the model
    pipeline.update(test, maxiter=5)

    # And that the fourier transformer was updated properly...
    assert pipeline.steps_[0][1].n_ == wineind.shape[0]
예제 #12
0
             ("stage2", FourierFeaturizer(m=12))]
        ]
    )
    def test_bad_last_stage(self, stages):
        # Will fail since the last stage is not an estimator
        with pytest.raises(TypeError) as ve:
            Pipeline(stages)

        assert "Last step of Pipeline should be" in pytest_error_str(ve)


@pytest.mark.parametrize(
    'pipe,kwargs,expected', [
        pytest.param(
            Pipeline([
                ("boxcox", BoxCoxEndogTransformer()),
                ("arima", AutoARIMA())
            ]),
            {},
            {"boxcox": {}, "arima": {}}
        ),

        pytest.param(
            Pipeline([
                ("boxcox", BoxCoxEndogTransformer()),
                ("arima", AutoARIMA())
            ]),
            {"boxcox__lmdba1": 0.001},
            {"boxcox": {"lmdba1": 0.001}, "arima": {}}
        ),
    ]
예제 #13
0
 def test_expected_error(self):
     y = self.y
     trans = BoxCoxEndogTransformer(lmbda=2.)
     with pytest.raises(ValueError):
         trans.fit_transform(y)
import pmdarima as pm
from pmdarima.model_selection import train_test_split
from pmdarima.pipeline import Pipeline
from pmdarima.preprocessing import BoxCoxEndogTransformer
import pickle

# Load/split your data
y = pm.datasets.load_sunspots()
train, test = train_test_split(y, train_size=2700)

# Define and fit your pipeline
pipeline = Pipeline([
    ('boxcox',
     BoxCoxEndogTransformer(lmbda2=1e-6)),  # lmbda2 avoids negative values
    ('arima',
     pm.AutoARIMA(seasonal=True, m=12, suppress_warnings=True, trace=True))
])

pipeline.fit(train)

# Serialize your model just like you would in scikit:
with open('model.pkl', 'wb') as pkl:
    pickle.dump(pipeline, pkl)

# Load it and make predictions seamlessly:
with open('model.pkl', 'rb') as pkl:
    mod = pickle.load(pkl)
    print(mod.predict(15))
# [25.20580375 25.05573898 24.4263037  23.56766793 22.67463049 21.82231043
# 21.04061069 20.33693017 19.70906027 19.1509862  18.6555793  18.21577243
# 17.8250318  17.47750614 17.16803394]
예제 #15
0
def ARIMA(x, n_periods=14, normalize=False):

    # input a pandas Series
    # contain a time series data with timestamp as index

    # split data
    train = x[60:-n_periods]
    test = x[-n_periods:]

    # Box-Cox Transformation
    if normalize == True:
        boxcox = BoxCoxEndogTransformer(lmbda2=1e-6).fit(train)
        train, _ = boxcox.transform(train)
        test, _ = boxcox.transform(test)

    best_model = None
    best_scores = np.infty

    # Train SARIMA
    for i in range(1, 3):
        for j in range(1, 3):
            model = pm.auto_arima(train,
                                  m=7,
                                  max_p=3,
                                  max_q=3,
                                  max_P=3,
                                  max_Q=3,
                                  d=i,
                                  D=j,
                                  max_order=12,
                                  stepwise=True,
                                  out_of_sample_size=n_periods,
                                  scoring='mae',
                                  information_criterion='oob',
                                  error_action='ignore',
                                  trace=False,
                                  suppress_warnings=True)

            pred = model.predict(n_periods=n_periods)
            mae = mean_absolute_error(test, pred)
            if mae < best_scores:
                best_scores = mae
                best_model = model

    # Envaluation Metrics
    pred = best_model.predict(n_periods=n_periods)
    if normalize == True:
        pred, _ = boxcox.inverse_transform(pred)

    pred = pd.Series(pred, index=x.index[-n_periods:])
    r2 = round(r2_score(test, pred), 2)
    RMSE = round(np.sqrt(mean_squared_error(test, pred)), 2)
    MAE = round(mean_absolute_error(test, pred), 2)
    SMAPE = round(smape(test, pred), 2)
    print('R2:', r2)
    print('RMSE is {}'.format(RMSE))
    print('MAE is {}'.format(MAE))
    print('SMAPE is {}'.format(SMAPE))

    ax = x.plot(label='Observed', figsize=(14, 4), linewidth=3)
    pred.plot(ax=ax, label='Forecasting', linewidth=3)
    ax.set_xlabel('Date')
    ax.set_ylabel('Furniture Sales')
    plt.legend()
    plt.show()

    # Forecasting
    start = x.index[-1] + pd.Timedelta(1, unit='D')
    end = start + pd.Timedelta(n_periods - 1, unit='D')
    time_range = pd.date_range(start, end, freq='D')

    model.update(test)
    pred, confi = model.predict(n_periods=n_periods, return_conf_int=True)
    if normalize == True:
        pred, _ = boxcox.inverse_transform(pred)

    pred = pd.Series(pred, name='Forecasting', index=time_range).reset_index()
    confi = pd.DataFrame(confi, columns=['pred_lower', 'pred_upper'])

    pred['Order Date'] = pred['index'].dt.date.astype('datetime64[ns]')
    pred.set_index('Order Date', inplace=True)
    pred.drop('index', axis=1, inplace=True)

    # save results and plots
    pd.concat([pred, confi], axis=1).to_csv('forecasting.csv', index=False)
    ax = x.plot(label='Observed', figsize=(14, 4), linewidth=3)
    pred.plot(ax=ax, label='Forecasting', linewidth=3)
    ax.fill_between(pred.index,
                    confi.iloc[:, 0],
                    confi.iloc[:, 1],
                    color='k',
                    alpha=.25)
    ax.set_xlabel('Date')
    ax.set_ylabel('Furniture Sales')
    plt.legend()
    plt.show()

    return model
예제 #16
0
            # Two transformers
            [("stage1", BoxCoxEndogTransformer()),
             ("stage2", FourierFeaturizer(m=12))]
        ])
    def test_bad_last_stage(self, stages):
        # Will fail since the last stage is not an estimator
        with pytest.raises(TypeError) as ve:
            Pipeline(stages)

        assert "Last step of Pipeline should be" in pytest_error_str(ve)


@pytest.mark.parametrize('pipe,kwargs,expected', [
    pytest.param(
        Pipeline([("boxcox", BoxCoxEndogTransformer()),
                  ("arima", AutoARIMA())]), {}, {
                      "boxcox": {},
                      "arima": {}
                  }),
    pytest.param(
        Pipeline([("boxcox", BoxCoxEndogTransformer()),
                  ("arima", AutoARIMA())]), {"boxcox__lmdba1": 0.001}, {
                      "boxcox": {
                          "lmdba1": 0.001
                      },
                      "arima": {}
                  }),
])
def test_get_kwargs(pipe, kwargs, expected):
    # Test we get the kwargs we expect
예제 #17
0
    generated_data = generate_example_data(
        column_count=3,
        series_count=2,
        series_size=365 * 3,
        start_dt="2019-01-01",
        days_period=1,
    )

    training_data = generated_data.df
    group_key_columns = generated_data.key_columns

    pipeline_obj = Pipeline(
        steps=[
            (
                "box",
                BoxCoxEndogTransformer(lmbda2=0.4, neg_action="raise", floor=1e-12),
            ),
            ("arima", AutoARIMA(out_of_sample_size=60, max_p=4, max_q=4, max_d=4)),
        ]
    )
    pipeline_arima = GroupedPmdarima(model_template=pipeline_obj).fit(
        df=training_data,
        group_key_columns=group_key_columns,
        y_col="y",
        datetime_col="ds",
        silence_warnings=True,
    )

    # Save to local directory
    save_dir = "/tmp/group_pmdarima/pipeline.gpmd"
    pipeline_arima.save(save_dir)
예제 #18
0
def test_invertible_when_lambda_is_0():
    y = [1, 2, 3]
    trans = BoxCoxEndogTransformer(lmbda=0.)
    y_t, _ = trans.fit_transform(y)
    y_prime, _ = trans.inverse_transform(y_t)
    assert_array_almost_equal(y, y_prime)
예제 #19
0
def test_value_error_on_neg_lambda():
    trans = BoxCoxEndogTransformer(lmbda2=-4.)
    with pytest.raises(ValueError) as ve:
        trans.fit_transform([1, 2, 3])
    assert 'lmbda2 must be a non-negative' in pytest_error_str(ve)
예제 #20
0
    def forecast(self, forecast_horizon: int = 96):
        super().forecast(forecast_horizon)

        print(
            "Running ARIMA forecast for Currency-pair: {} using forecast horizon: {}",
            self.currency_pair.upper(), forecast_horizon)
        print("Dataset: ", self.currency_pair.upper())
        print(self.training_data.head(5))
        print(".....\t.........\t...")
        print(self.training_data.tail(5))

        # define and fit the pipeline/model
        pipeline = Pipeline([('boxcox', BoxCoxEndogTransformer(lmbda2=1e-6)),
                             ('arima',
                              pm.AutoARIMA(start_p=1,
                                           start_q=1,
                                           max_p=3,
                                           max_q=3,
                                           d=1,
                                           D=1,
                                           start_P=0,
                                           error_action='ignore',
                                           suppress_warnings=True,
                                           stepwise=True,
                                           seasonal=True,
                                           m=12,
                                           trace=True))])
        pipeline.fit(self.training_data['close'])
        # model = pm.auto_arima(self.training_data["close"], seasonal=True, m=12)

        # serialize model
        model_file = f"intermediates/arima_{self.currency_pair}.pkl"
        with open(model_file, "wb") as file:
            pickle.dump(pipeline, file)

        # load model and make predictions seamlessly
        with open(model_file, "rb") as file:
            model = pickle.load(file)

        # make the forecasts
        predictions = model.predict(n_periods=forecast_horizon,
                                    return_conf_int=True)
        print("ARIMA forecast ... complete")
        collated_results = DataFrame.from_records([{
            "forecast":
            value,
            "error":
            abs(bounds[0] - bounds[1]) / 2,
            "forecast_lower":
            bounds[0],
            "forecast_upper":
            bounds[1]
        } for value, bounds in zip(predictions[0], predictions[1])])

        self.forecasts = collated_results["forecast"]
        self.errors = collated_results["error"]
        self.forecasts_lower = collated_results["forecast_lower"]
        self.forecasts_upper = collated_results["forecast_upper"]
        self.forecasts_raw = collated_results

        collated_results.to_csv(
            f"output/{self.currency_pair}__{self.model_name.lower()}__{forecast_horizon}__forecasts.csv"
        )
        print(collated_results)