def test_pipeline_behavior(): wineind = load_wineind() train, test = wineind[:125], wineind[125:] pipeline = Pipeline([ ("fourier", FourierFeaturizer(m=12)), ("arima", AutoARIMA(seasonal=False, stepwise=True, suppress_warnings=True, maxiter=3, error_action='ignore')) ]) # Quick assertions on indexing assert len(pipeline) == 2 pipeline.fit(train) preds = pipeline.predict(5) assert preds.shape[0] == 5 assert pipeline._final_estimator.model_.fit_with_exog_ # Assert that when the n_periods kwarg is set manually and incorrectly for # the fourier transformer, we get a ValueError kwargs = { "fourier__n_periods": 10 } with pytest.raises(ValueError) as ve: pipeline.predict(3, **kwargs) assert "'n_periods'" in pytest_error_str(ve) # Assert that we can update the model pipeline.update(test, maxiter=5) # And that the fourier transformer was updated properly... assert pipeline.steps_[0][1].n_ == wineind.shape[0]
def test_non_unique_names(self): # Will fail since the same name repeated twice with pytest.raises(ValueError) as ve: Pipeline([("stage", BoxCoxEndogTransformer()), ("stage", ARIMA(order=(0, 0, 0)))]) assert "not unique" in pytest_error_str(ve)
def test_names_in_params(self): # Will fail because 'steps' is a param of Pipeline with pytest.raises(ValueError) as ve: Pipeline([("steps", BoxCoxEndogTransformer()), ("stage", ARIMA(order=(0, 0, 0)))]) assert "names conflict" in pytest_error_str(ve)
def test_names_double_underscore(self): # Will fail since the "__" is reserved for parameter names with pytest.raises(ValueError) as ve: Pipeline([("stage__1", BoxCoxEndogTransformer()), ("stage", ARIMA(order=(0, 0, 0)))]) assert "must not contain __" in pytest_error_str(ve)
def test_non_transformer_in_steps(self): # Will fail since the first stage is not a transformer with pytest.raises(TypeError) as ve: Pipeline([ ("stage1", (lambda *args, **kwargs: None)), # Fail ("stage2", AutoARIMA()) ]) assert "instances of BaseTransformer" in pytest_error_str(ve)
def basic_pipeline(data): pipeline = Pipeline(steps=[ ("fourier", FourierFeaturizer(k=3, m=7)), ("arima", AutoARIMA(out_of_sample_size=60)), ]) return GroupedPmdarima(pipeline).fit( data.df, data.key_columns, "y", "ds", )
def model(data): arima = GroupedPmdarima(model_template=Pipeline( steps=[("arima", AutoARIMA(out_of_sample_size=60, max_order=7))]), ).fit( df=data.df, group_key_columns=data.key_columns, y_col="y", datetime_col="ds", silence_warnings=True, ) return arima
def pipeline_override_d(data): pipeline = Pipeline(steps=[("arima", AutoARIMA(out_of_sample_size=30))]) util = PmdarimaAnalyzer(df=data.df, group_key_columns=data.key_columns, y_col="y", datetime_col="ds") ndiffs = util.calculate_ndiffs(alpha=0.2, test="kpss", max_d=7) nsdiffs = util.calculate_nsdiffs(m=7, test="ocsb", max_D=7) return GroupedPmdarima(pipeline).fit( df=data.df, group_key_columns=data.key_columns, y_col="y", datetime_col="ds", ndiffs=ndiffs, nsdiffs=nsdiffs, silence_warnings=True, )
def test_order_does_not_matter_with_date_transformer(): train_y_dates, test_y_dates, train_X_dates, test_X_dates = \ train_test_split(y_dates, X_dates, test_size=15) pipeline_a = Pipeline([ ('fourier', FourierFeaturizer(m=3, prefix="FOURIER")), ('dates', DateFeaturizer(column_name="date", prefix="DATE")), ("arima", AutoARIMA(seasonal=False, stepwise=True, suppress_warnings=True, maxiter=3, error_action='ignore')) ]).fit(train_y_dates, train_X_dates) Xt_a = pipeline_a.transform(exogenous=test_X_dates) pred_a = pipeline_a.predict(exogenous=test_X_dates) pipeline_b = Pipeline([ ('dates', DateFeaturizer(column_name="date", prefix="DATE")), ('fourier', FourierFeaturizer(m=3, prefix="FOURIER")), ("arima", AutoARIMA(seasonal=False, stepwise=True, suppress_warnings=True, maxiter=3, error_action='ignore')) ]).fit(train_y_dates, train_X_dates) Xt_b = pipeline_b.transform(exogenous=test_X_dates) pred_b = pipeline_b.predict(exogenous=test_X_dates) # dates in A should differ from those in B assert pipeline_a.x_feats_[0].startswith("FOURIER") assert pipeline_a.x_feats_[-1].startswith("DATE") assert pipeline_b.x_feats_[0].startswith("DATE") assert pipeline_b.x_feats_[-1].startswith("FOURIER") # columns should be identical once ordered appropriately assert Xt_a.equals(Xt_b[pipeline_a.x_feats_]) # forecasts should be identical assert_array_almost_equal(pred_a, pred_b, decimal=3)
def test_bad_last_stage(self, stages): # Will fail since the last stage is not an estimator with pytest.raises(TypeError) as ve: Pipeline(stages) assert "Last step of Pipeline should be" in pytest_error_str(ve)
("stage2", FourierFeaturizer(m=12))] ] ) def test_bad_last_stage(self, stages): # Will fail since the last stage is not an estimator with pytest.raises(TypeError) as ve: Pipeline(stages) assert "Last step of Pipeline should be" in pytest_error_str(ve) @pytest.mark.parametrize( 'pipe,kwargs,expected', [ pytest.param( Pipeline([ ("boxcox", BoxCoxEndogTransformer()), ("arima", AutoARIMA()) ]), {}, {"boxcox": {}, "arima": {}} ), pytest.param( Pipeline([ ("boxcox", BoxCoxEndogTransformer()), ("arima", AutoARIMA()) ]), {"boxcox__lmdba1": 0.001}, {"boxcox": {"lmdba1": 0.001}, "arima": {}} ), ] )
@pytest.mark.parametrize('cv', [ SlidingWindowForecastCV(window_size=100, step=24, h=1), RollingForecastCV(initial=120, step=12, h=1), ]) @pytest.mark.parametrize( 'est', [ ARIMA(order=(2, 1, 1), maxiter=2, simple_differencing=True), ARIMA(order=(1, 1, 2), seasonal_order=(0, 1, 1, 12), maxiter=2, simple_differencing=True, suppress_warnings=True), Pipeline([ ("fourier", FourierFeaturizer(m=12)), ("arima", ARIMA(order=(2, 1, 0), maxiter=2, simple_differencing=True)) ]) ] ) @pytest.mark.parametrize('verbose', [0, 2, 4]) @pytest.mark.parametrize('X', [None, exogenous]) def test_cv_scores(cv, est, verbose, X): scores = cross_val_score( est, y, X=X, scoring='mean_squared_error', cv=cv, verbose=verbose) assert isinstance(scores, np.ndarray) @pytest.mark.parametrize('cv', [ SlidingWindowForecastCV(window_size=100, step=12, h=12),
series_count=3, series_size=365 * 3, start_dt="2019-01-01", days_period=1, ) training_data = generated_data.df group_key_columns = generated_data.key_columns pipeline = Pipeline( steps=[ ( "arima", AutoARIMA( max_order=14, out_of_sample_size=90, suppress_warnings=True, error_action="ignore", ), ) ] ) diff_analyzer = PmdarimaAnalyzer( df=training_data, group_key_columns=group_key_columns, y_col="y", datetime_col="ds", ) ndiff = diff_analyzer.calculate_ndiffs( alpha=0.05,
prefix = valfiles_oi[ind].split( '_')[0] + '-validation-{}d-'.format(pred) #滑动窗口 for i in range(past + pred - 1, len(price)): print( '===========当前训练的是{}数据集,目标节点是{}=================='.format( valfiles_oi[ind].split('_')[0], val_3m.index[(i - (past + pred) + 1)])) sample = price[(i - (past + pred) + 1):(i + 1)] train, test = train_test_split(sample, train_size=past) pipeline = Pipeline([ # ('boxcox', BoxCoxEndogTransformer(lmbda2=1e-6)), # lmbda2 avoids negative values ('arima', pm.AutoARIMA(seasonal=True, m=1, suppress_warnings=True, trace=True, error_action="ignore")) ]) pipeline.fit(train) pred_result = pipeline.predict(pred) print('pred_result is : ', pred_result) print( '====================一次训练结束=============================\n\n\n' ) val_index = prefix + val_3m.index[(i - (past + pred) + 1)] if use_diff: val_label = 1 if pred_result[-1] > 0 else 0
generated_data = generate_example_data( column_count=3, series_count=2, series_size=365 * 3, start_dt="2019-01-01", days_period=1, ) training_data = generated_data.df group_key_columns = generated_data.key_columns pipeline_obj = Pipeline( steps=[ ( "box", BoxCoxEndogTransformer(lmbda2=0.4, neg_action="raise", floor=1e-12), ), ("arima", AutoARIMA(out_of_sample_size=60, max_p=4, max_q=4, max_d=4)), ] ) pipeline_arima = GroupedPmdarima(model_template=pipeline_obj).fit( df=training_data, group_key_columns=group_key_columns, y_col="y", datetime_col="ds", silence_warnings=True, ) # Save to local directory save_dir = "/tmp/group_pmdarima/pipeline.gpmd" pipeline_arima.save(save_dir)
# Two transformers [("stage1", BoxCoxEndogTransformer()), ("stage2", FourierFeaturizer(m=12))] ]) def test_bad_last_stage(self, stages): # Will fail since the last stage is not an estimator with pytest.raises(TypeError) as ve: Pipeline(stages) assert "Last step of Pipeline should be" in pytest_error_str(ve) @pytest.mark.parametrize('pipe,kwargs,expected', [ pytest.param( Pipeline([("boxcox", BoxCoxEndogTransformer()), ("arima", AutoARIMA())]), {}, { "boxcox": {}, "arima": {} }), pytest.param( Pipeline([("boxcox", BoxCoxEndogTransformer()), ("arima", AutoARIMA())]), {"boxcox__lmdba1": 0.001}, { "boxcox": { "lmdba1": 0.001 }, "arima": {} }), ]) def test_get_kwargs(pipe, kwargs, expected): # Test we get the kwargs we expect kw = pipe._get_kwargs(**kwargs)
from sklearn.base import clone from pmdarima.arima import ARIMA, AutoARIMA from pmdarima.pipeline import Pipeline from pmdarima.datasets import load_wineind from pmdarima.preprocessing import FourierFeaturizer import pytest y = load_wineind() @pytest.mark.parametrize( 'est', [ ARIMA(order=(2, 1, 1), seasonal_order=(0, 0, 0, 1)), AutoARIMA(seasonal=False, maxiter=3), Pipeline([ ("fourier", FourierFeaturizer(m=12)), ("arima", AutoARIMA(seasonal=False, stepwise=True, suppress_warnings=True, d=1, max_p=2, max_q=0, start_q=0, start_p=1, maxiter=3, error_action='ignore')) ]) ] ) def test_clonable(est): # fit it, then clone it est.fit(y) est2 = clone(est) assert isinstance(est2, est.__class__) assert est is not est2
import pmdarima as pm from pmdarima.model_selection import train_test_split from pmdarima.pipeline import Pipeline from pmdarima.preprocessing import BoxCoxEndogTransformer import pickle # Load/split your data y = pm.datasets.load_sunspots() train, test = train_test_split(y, train_size=2700) # Define and fit your pipeline pipeline = Pipeline([ ('boxcox', BoxCoxEndogTransformer(lmbda2=1e-6)), # lmbda2 avoids negative values ('arima', pm.AutoARIMA(seasonal=True, m=12, suppress_warnings=True, trace=True)) ]) pipeline.fit(train) # Serialize your model just like you would in scikit: with open('model.pkl', 'wb') as pkl: pickle.dump(pipeline, pkl) # Load it and make predictions seamlessly: with open('model.pkl', 'rb') as pkl: mod = pickle.load(pkl) print(mod.predict(15)) # [25.20580375 25.05573898 24.4263037 23.56766793 22.67463049 21.82231043 # 21.04061069 20.33693017 19.70906027 19.1509862 18.6555793 18.21577243 # 17.8250318 17.47750614 17.16803394]
def forecast(self, forecast_horizon: int = 96): super().forecast(forecast_horizon) print( "Running ARIMA forecast for Currency-pair: {} using forecast horizon: {}", self.currency_pair.upper(), forecast_horizon) print("Dataset: ", self.currency_pair.upper()) print(self.training_data.head(5)) print(".....\t.........\t...") print(self.training_data.tail(5)) # define and fit the pipeline/model pipeline = Pipeline([('boxcox', BoxCoxEndogTransformer(lmbda2=1e-6)), ('arima', pm.AutoARIMA(start_p=1, start_q=1, max_p=3, max_q=3, d=1, D=1, start_P=0, error_action='ignore', suppress_warnings=True, stepwise=True, seasonal=True, m=12, trace=True))]) pipeline.fit(self.training_data['close']) # model = pm.auto_arima(self.training_data["close"], seasonal=True, m=12) # serialize model model_file = f"intermediates/arima_{self.currency_pair}.pkl" with open(model_file, "wb") as file: pickle.dump(pipeline, file) # load model and make predictions seamlessly with open(model_file, "rb") as file: model = pickle.load(file) # make the forecasts predictions = model.predict(n_periods=forecast_horizon, return_conf_int=True) print("ARIMA forecast ... complete") collated_results = DataFrame.from_records([{ "forecast": value, "error": abs(bounds[0] - bounds[1]) / 2, "forecast_lower": bounds[0], "forecast_upper": bounds[1] } for value, bounds in zip(predictions[0], predictions[1])]) self.forecasts = collated_results["forecast"] self.errors = collated_results["error"] self.forecasts_lower = collated_results["forecast_lower"] self.forecasts_upper = collated_results["forecast_upper"] self.forecasts_raw = collated_results collated_results.to_csv( f"output/{self.currency_pair}__{self.model_name.lower()}__{forecast_horizon}__forecasts.csv" ) print(collated_results)
import numpy as np from unittest import mock y = load_wineind() exogenous = np.random.RandomState(1).rand(y.shape[0], 2) @pytest.mark.parametrize('cv', [ SlidingWindowForecastCV(window_size=100, step=24, h=1), RollingForecastCV(initial=150, step=12, h=1), ]) @pytest.mark.parametrize('est', [ ARIMA(order=(2, 1, 1)), ARIMA( order=(1, 1, 2), seasonal_order=(0, 1, 1, 12), suppress_warnings=True), Pipeline([("fourier", FourierFeaturizer(m=12)), ("arima", ARIMA(order=(2, 1, 0), maxiter=3))]) ]) @pytest.mark.parametrize('verbose', [0, 2, 4]) @pytest.mark.parametrize('exog', [None, exogenous]) def test_cv_scores(cv, est, verbose, exog): scores = cross_val_score(est, y, exogenous=exog, scoring='mean_squared_error', cv=cv, verbose=verbose) assert isinstance(scores, np.ndarray) @pytest.mark.parametrize('cv', [ SlidingWindowForecastCV(window_size=100, step=12, h=12),
def train_arima_model(data_train, date_init, date_fin, op_red, type_day, transform='decompose-Fourier' , type_decompose='additive', n_decompose=1, n_coeff_fourier=4, filter_decompose=None): num_cluster = data_train.name data_train = np.array(data_train)[~np.isnan(np.array(data_train))] type_model = 'arima' if transform == 'decompose-Fourier' or transform == 'decompose-Fourier-log': print('n_decompose: ', n_decompose, 'n_coeff_fourier: ', n_coeff_fourier) forecast_seasonal, trend_residual, n_diffs, periods_decompose, m_f, k_f = get_transform_model(data_train, transform=transform , type_decompose=type_decompose , n_decompose=n_decompose , n_coeff_fourier=n_coeff_fourier) pipeline_trend_residual = Pipeline([ ('fourier', ppc.FourierFeaturizer(m=m_f, k=k_f)) , ("model", pm.AutoARIMA(d=n_diffs, seasonal=False, trace=True, error_action='ignore' , maxiter=30, max_p=4, max_q=4, suppress_warnings=True, with_intercept=True))]) print('\t\t\t training model...') pipeline_trend_residual.fit(trend_residual) print(pipeline_trend_residual.summary()) # aic_model = pipeline_trend_residual.steps[-1][1].model_.aic() print('\t\t\t saving model...') save_model_dir(pipeline_trend_residual, transform, num_cluster, op_red, type_day, type_model, date_init , date_fin, periods_decompose, str(n_decompose), type_decompose) print('\t\t\t finish save model...') elif transform == 'Fourier': n_diffs, m_f, k_f = get_transform_model(data_train, transform=transform, n_coeff_fourier=n_coeff_fourier) pipeline = Pipeline([ ('fourier', ppc.FourierFeaturizer(m=m_f, k=k_f)) , ("model", pm.AutoARIMA(d=n_diffs, seasonal=False, trace=True, error_action='ignore' , maxiter=30, max_p=4, max_q=4, suppress_warnings=True, with_intercept=True))]) pipeline.fit(data_train) save_model_dir(pipeline, transform, num_cluster, op_red, type_day, type_model, date_init, date_fin) elif transform == 'decompose': forecast_seasonal, trend_residual, n_diffs, ns_diffs, periods_decompose, m_f = get_transform_model(data_train , transform=transform , type_decompose=type_decompose , n_decompose=n_decompose) pipeline_trend_residual = Pipeline( [("model", pm.AutoARIMA(d=n_diffs, D=ns_diffs, seasonal=True, m=m_f, trace=True, error_action='ignore' , maxiter=30, max_p=4, max_q=4, suppress_warnings=True, with_intercept=True))]) pipeline_trend_residual.fit(trend_residual) save_model_dir(pipeline_trend_residual, transform, num_cluster, op_red, type_day, type_model, date_init , date_fin, periods_decompose, str(n_decompose), type_decompose) elif transform == 'normal': n_diffs, ns_diffs, m_f = get_transform_model(data_train, transform=transform) pipeline = Pipeline( [("model", pm.AutoARIMA(d=n_diffs, D=ns_diffs, seasonal=True, m=m_f, trace=True, error_action='ignore' , maxiter=30, max_p=4, max_q=4, suppress_warnings=True, with_intercept=True))]) pipeline.fit(data_train) save_model_dir(pipeline, transform, num_cluster, op_red, type_day, type_model, date_init, date_fin) else: raise ValueError('invalid variable transform {}.'.format(transform))