def test_same(): y = [1, 2, 3] trans = BoxCoxEndogTransformer(lmbda=0) log_trans = LogEndogTransformer() y_t, _ = trans.fit_transform(y) log_y_t, _ = log_trans.fit_transform(y) assert_array_almost_equal(log_y_t, y_t)
def test_invertible_when_lam2(self): y = self.y trans = BoxCoxEndogTransformer(lmbda=2., lmbda2=2.) y_t, _ = trans.fit_transform(y) # When we invert, it will not be the same y_prime, _ = trans.inverse_transform(y_t) assert_array_almost_equal(y, y_prime)
def test_no_warning_on_ignore(self): y = self.y trans = BoxCoxEndogTransformer(lmbda=2., neg_action="ignore") y_t, _ = trans.fit_transform(y) # When we invert, it will not be the same y_prime, _ = trans.inverse_transform(y_t) assert not np.allclose(y_prime, y)
class TestIllegal: def test_non_unique_names(self): # Will fail since the same name repeated twice with pytest.raises(ValueError) as ve: Pipeline([ ("stage", BoxCoxEndogTransformer()), ("stage", ARIMA(order=(0, 0, 0))) ]) assert "not unique" in pytest_error_str(ve) def test_names_in_params(self): # Will fail because 'steps' is a param of Pipeline with pytest.raises(ValueError) as ve: Pipeline([ ("steps", BoxCoxEndogTransformer()), ("stage", ARIMA(order=(0, 0, 0))) ]) assert "names conflict" in pytest_error_str(ve) def test_names_double_underscore(self): # Will fail since the "__" is reserved for parameter names with pytest.raises(ValueError) as ve: Pipeline([ ("stage__1", BoxCoxEndogTransformer()), ("stage", ARIMA(order=(0, 0, 0))) ]) assert "must not contain __" in pytest_error_str(ve) def test_non_transformer_in_steps(self): # Will fail since the first stage is not a transformer with pytest.raises(TypeError) as ve: Pipeline([ ("stage1", (lambda *args, **kwargs: None)), # Fail ("stage2", AutoARIMA()) ]) assert "instances of BaseTransformer" in pytest_error_str(ve) @pytest.mark.parametrize( 'stages', [ # Nothing BUT a transformer [("stage1", BoxCoxEndogTransformer())], # Two transformers [("stage1", BoxCoxEndogTransformer()), ("stage2", FourierFeaturizer(m=12))] ] ) def test_bad_last_stage(self, stages): # Will fail since the last stage is not an estimator with pytest.raises(TypeError) as ve: Pipeline(stages) assert "Last step of Pipeline should be" in pytest_error_str(ve)
def test_expected_warning(self): y = self.y trans = BoxCoxEndogTransformer(lmbda=2., neg_action="warn") with pytest.warns(UserWarning): y_t, _ = trans.fit_transform(y) # When we invert, it will not be the same y_prime, _ = trans.inverse_transform(y_t) assert not np.allclose(y_prime, y)
def test_invertible(X): trans = BoxCoxEndogTransformer() y_t, e_t = trans.fit_transform(loggamma, X=X) y_prime, e_prime = trans.inverse_transform(y_t, X=e_t) assert_array_almost_equal(loggamma, y_prime) # X should all be the same too if X is None: assert X is e_t is e_prime is None else: assert_array_almost_equal(X, e_t) assert_array_almost_equal(X, e_prime)
def test_invertible(exog): trans = BoxCoxEndogTransformer() y_t, e_t = trans.fit_transform(loggamma, exogenous=exog) y_prime, e_prime = trans.inverse_transform(y_t, exogenous=e_t) assert_array_almost_equal(loggamma, y_prime) # exog should all be the same too if exog is None: assert exog is e_t is e_prime is None else: assert_array_almost_equal(exog, e_t) assert_array_almost_equal(exog, e_prime)
def test_names_double_underscore(self): # Will fail since the "__" is reserved for parameter names with pytest.raises(ValueError) as ve: Pipeline([("stage__1", BoxCoxEndogTransformer()), ("stage", ARIMA(order=(0, 0, 0)))]) assert "must not contain __" in pytest_error_str(ve)
def test_names_in_params(self): # Will fail because 'steps' is a param of Pipeline with pytest.raises(ValueError) as ve: Pipeline([("steps", BoxCoxEndogTransformer()), ("stage", ARIMA(order=(0, 0, 0)))]) assert "names conflict" in pytest_error_str(ve)
def test_non_unique_names(self): # Will fail since the same name repeated twice with pytest.raises(ValueError) as ve: Pipeline([("stage", BoxCoxEndogTransformer()), ("stage", ARIMA(order=(0, 0, 0)))]) assert "not unique" in pytest_error_str(ve)
def test_pipeline_behavior(): pipeline = Pipeline([ ("fourier", FourierFeaturizer(m=12)), ("boxcox", BoxCoxEndogTransformer()), ("arima", AutoARIMA(seasonal=False, stepwise=True, suppress_warnings=True, d=1, max_p=2, max_q=0, start_q=0, start_p=1, maxiter=3, error_action='ignore')) ]) # Quick assertions on indexing assert len(pipeline) == 3 pipeline.fit(train) preds = pipeline.predict(5) assert preds.shape[0] == 5 assert pipeline._final_estimator.model_.fit_with_exog_ # Assert that when the n_periods kwarg is set manually and incorrectly for # the fourier transformer, we get a ValueError kwargs = { "fourier__n_periods": 10 } with pytest.raises(ValueError) as ve: pipeline.predict(3, **kwargs) assert "'n_periods'" in pytest_error_str(ve) # Assert that we can update the model pipeline.update(test, maxiter=5) # And that the fourier transformer was updated properly... assert pipeline.steps_[0][1].n_ == wineind.shape[0]
("stage2", FourierFeaturizer(m=12))] ] ) def test_bad_last_stage(self, stages): # Will fail since the last stage is not an estimator with pytest.raises(TypeError) as ve: Pipeline(stages) assert "Last step of Pipeline should be" in pytest_error_str(ve) @pytest.mark.parametrize( 'pipe,kwargs,expected', [ pytest.param( Pipeline([ ("boxcox", BoxCoxEndogTransformer()), ("arima", AutoARIMA()) ]), {}, {"boxcox": {}, "arima": {}} ), pytest.param( Pipeline([ ("boxcox", BoxCoxEndogTransformer()), ("arima", AutoARIMA()) ]), {"boxcox__lmdba1": 0.001}, {"boxcox": {"lmdba1": 0.001}, "arima": {}} ), ]
def test_expected_error(self): y = self.y trans = BoxCoxEndogTransformer(lmbda=2.) with pytest.raises(ValueError): trans.fit_transform(y)
import pmdarima as pm from pmdarima.model_selection import train_test_split from pmdarima.pipeline import Pipeline from pmdarima.preprocessing import BoxCoxEndogTransformer import pickle # Load/split your data y = pm.datasets.load_sunspots() train, test = train_test_split(y, train_size=2700) # Define and fit your pipeline pipeline = Pipeline([ ('boxcox', BoxCoxEndogTransformer(lmbda2=1e-6)), # lmbda2 avoids negative values ('arima', pm.AutoARIMA(seasonal=True, m=12, suppress_warnings=True, trace=True)) ]) pipeline.fit(train) # Serialize your model just like you would in scikit: with open('model.pkl', 'wb') as pkl: pickle.dump(pipeline, pkl) # Load it and make predictions seamlessly: with open('model.pkl', 'rb') as pkl: mod = pickle.load(pkl) print(mod.predict(15)) # [25.20580375 25.05573898 24.4263037 23.56766793 22.67463049 21.82231043 # 21.04061069 20.33693017 19.70906027 19.1509862 18.6555793 18.21577243 # 17.8250318 17.47750614 17.16803394]
def ARIMA(x, n_periods=14, normalize=False): # input a pandas Series # contain a time series data with timestamp as index # split data train = x[60:-n_periods] test = x[-n_periods:] # Box-Cox Transformation if normalize == True: boxcox = BoxCoxEndogTransformer(lmbda2=1e-6).fit(train) train, _ = boxcox.transform(train) test, _ = boxcox.transform(test) best_model = None best_scores = np.infty # Train SARIMA for i in range(1, 3): for j in range(1, 3): model = pm.auto_arima(train, m=7, max_p=3, max_q=3, max_P=3, max_Q=3, d=i, D=j, max_order=12, stepwise=True, out_of_sample_size=n_periods, scoring='mae', information_criterion='oob', error_action='ignore', trace=False, suppress_warnings=True) pred = model.predict(n_periods=n_periods) mae = mean_absolute_error(test, pred) if mae < best_scores: best_scores = mae best_model = model # Envaluation Metrics pred = best_model.predict(n_periods=n_periods) if normalize == True: pred, _ = boxcox.inverse_transform(pred) pred = pd.Series(pred, index=x.index[-n_periods:]) r2 = round(r2_score(test, pred), 2) RMSE = round(np.sqrt(mean_squared_error(test, pred)), 2) MAE = round(mean_absolute_error(test, pred), 2) SMAPE = round(smape(test, pred), 2) print('R2:', r2) print('RMSE is {}'.format(RMSE)) print('MAE is {}'.format(MAE)) print('SMAPE is {}'.format(SMAPE)) ax = x.plot(label='Observed', figsize=(14, 4), linewidth=3) pred.plot(ax=ax, label='Forecasting', linewidth=3) ax.set_xlabel('Date') ax.set_ylabel('Furniture Sales') plt.legend() plt.show() # Forecasting start = x.index[-1] + pd.Timedelta(1, unit='D') end = start + pd.Timedelta(n_periods - 1, unit='D') time_range = pd.date_range(start, end, freq='D') model.update(test) pred, confi = model.predict(n_periods=n_periods, return_conf_int=True) if normalize == True: pred, _ = boxcox.inverse_transform(pred) pred = pd.Series(pred, name='Forecasting', index=time_range).reset_index() confi = pd.DataFrame(confi, columns=['pred_lower', 'pred_upper']) pred['Order Date'] = pred['index'].dt.date.astype('datetime64[ns]') pred.set_index('Order Date', inplace=True) pred.drop('index', axis=1, inplace=True) # save results and plots pd.concat([pred, confi], axis=1).to_csv('forecasting.csv', index=False) ax = x.plot(label='Observed', figsize=(14, 4), linewidth=3) pred.plot(ax=ax, label='Forecasting', linewidth=3) ax.fill_between(pred.index, confi.iloc[:, 0], confi.iloc[:, 1], color='k', alpha=.25) ax.set_xlabel('Date') ax.set_ylabel('Furniture Sales') plt.legend() plt.show() return model
# Two transformers [("stage1", BoxCoxEndogTransformer()), ("stage2", FourierFeaturizer(m=12))] ]) def test_bad_last_stage(self, stages): # Will fail since the last stage is not an estimator with pytest.raises(TypeError) as ve: Pipeline(stages) assert "Last step of Pipeline should be" in pytest_error_str(ve) @pytest.mark.parametrize('pipe,kwargs,expected', [ pytest.param( Pipeline([("boxcox", BoxCoxEndogTransformer()), ("arima", AutoARIMA())]), {}, { "boxcox": {}, "arima": {} }), pytest.param( Pipeline([("boxcox", BoxCoxEndogTransformer()), ("arima", AutoARIMA())]), {"boxcox__lmdba1": 0.001}, { "boxcox": { "lmdba1": 0.001 }, "arima": {} }), ]) def test_get_kwargs(pipe, kwargs, expected): # Test we get the kwargs we expect
generated_data = generate_example_data( column_count=3, series_count=2, series_size=365 * 3, start_dt="2019-01-01", days_period=1, ) training_data = generated_data.df group_key_columns = generated_data.key_columns pipeline_obj = Pipeline( steps=[ ( "box", BoxCoxEndogTransformer(lmbda2=0.4, neg_action="raise", floor=1e-12), ), ("arima", AutoARIMA(out_of_sample_size=60, max_p=4, max_q=4, max_d=4)), ] ) pipeline_arima = GroupedPmdarima(model_template=pipeline_obj).fit( df=training_data, group_key_columns=group_key_columns, y_col="y", datetime_col="ds", silence_warnings=True, ) # Save to local directory save_dir = "/tmp/group_pmdarima/pipeline.gpmd" pipeline_arima.save(save_dir)
def test_invertible_when_lambda_is_0(): y = [1, 2, 3] trans = BoxCoxEndogTransformer(lmbda=0.) y_t, _ = trans.fit_transform(y) y_prime, _ = trans.inverse_transform(y_t) assert_array_almost_equal(y, y_prime)
def test_value_error_on_neg_lambda(): trans = BoxCoxEndogTransformer(lmbda2=-4.) with pytest.raises(ValueError) as ve: trans.fit_transform([1, 2, 3]) assert 'lmbda2 must be a non-negative' in pytest_error_str(ve)
def forecast(self, forecast_horizon: int = 96): super().forecast(forecast_horizon) print( "Running ARIMA forecast for Currency-pair: {} using forecast horizon: {}", self.currency_pair.upper(), forecast_horizon) print("Dataset: ", self.currency_pair.upper()) print(self.training_data.head(5)) print(".....\t.........\t...") print(self.training_data.tail(5)) # define and fit the pipeline/model pipeline = Pipeline([('boxcox', BoxCoxEndogTransformer(lmbda2=1e-6)), ('arima', pm.AutoARIMA(start_p=1, start_q=1, max_p=3, max_q=3, d=1, D=1, start_P=0, error_action='ignore', suppress_warnings=True, stepwise=True, seasonal=True, m=12, trace=True))]) pipeline.fit(self.training_data['close']) # model = pm.auto_arima(self.training_data["close"], seasonal=True, m=12) # serialize model model_file = f"intermediates/arima_{self.currency_pair}.pkl" with open(model_file, "wb") as file: pickle.dump(pipeline, file) # load model and make predictions seamlessly with open(model_file, "rb") as file: model = pickle.load(file) # make the forecasts predictions = model.predict(n_periods=forecast_horizon, return_conf_int=True) print("ARIMA forecast ... complete") collated_results = DataFrame.from_records([{ "forecast": value, "error": abs(bounds[0] - bounds[1]) / 2, "forecast_lower": bounds[0], "forecast_upper": bounds[1] } for value, bounds in zip(predictions[0], predictions[1])]) self.forecasts = collated_results["forecast"] self.errors = collated_results["error"] self.forecasts_lower = collated_results["forecast_lower"] self.forecasts_upper = collated_results["forecast_upper"] self.forecasts_raw = collated_results collated_results.to_csv( f"output/{self.currency_pair}__{self.model_name.lower()}__{forecast_horizon}__forecasts.csv" ) print(collated_results)