def test_with_oob(): # show we can fit with CV (kinda) arima = ARIMA(order=(2, 1, 2), suppress_warnings=True, scoring='mse', out_of_sample_size=10).fit(y=hr) oob = arima.oob() assert not np.isnan(oob) # show this works # Assert the predictions give the expected MAE/MSE oob_preds = arima.oob_preds_ assert oob_preds.shape[0] == 10 scoring = val.get_scoring_metric('mse') assert scoring(hr[-10:], oob_preds) == oob # show we can fit if ooss < 0 and oob will be nan arima = ARIMA(order=(2, 1, 2), suppress_warnings=True, out_of_sample_size=-1).fit(y=hr) assert np.isnan(arima.oob()) # This will raise since n_steps is not an int with pytest.raises(TypeError): arima.predict(n_periods="5") # But that we CAN forecast with an int... _ = arima.predict(n_periods=5) # noqa: F841 # Show we fail if cv > n_samples with pytest.raises(ValueError): ARIMA(order=(2, 1, 2), out_of_sample_size=1000).fit(hr)
def test_with_oob_and_exog(as_pd, scoring): endog = hr exog = np.random.RandomState(1).rand(hr.shape[0], 3) if as_pd: exog = pd.DataFrame.from_records(exog) endog = pd.Series(hr) arima = ARIMA(order=(2, 1, 2), suppress_warnings=True, scoring=scoring, out_of_sample_size=10).fit(y=endog, exogenous=exog) # show we can get oob score and preds arima.oob()
def test_oob_sarimax(): xreg = rs.rand(wineind.shape[0], 2) fit = ARIMA(order=(1, 1, 1), seasonal_order=(0, 1, 1, 12), out_of_sample_size=15).fit(y=wineind, exogenous=xreg) fit_no_oob = ARIMA(order=(1, 1, 1), seasonal_order=(0, 1, 1, 12), out_of_sample_size=0, suppress_warnings=True).fit(y=wineind[:-15], exogenous=xreg[:-15, :]) # now assert some of the same things here that we did in the former test oob = fit.oob() # compare scores: scoring = get_callable(fit_no_oob.scoring, VALID_SCORING) no_oob_preds = fit_no_oob.predict(n_periods=15, exogenous=xreg[-15:, :]) assert np.allclose(oob, scoring(wineind[-15:], no_oob_preds), rtol=1e-2) # show params are still the same assert np.allclose(fit.params(), fit_no_oob.params(), rtol=1e-2) # show we can add the new samples and get the exact same forecasts xreg_test = rs.rand(5, 2) fit_no_oob.add_new_observations(wineind[-15:], xreg[-15:, :]) assert np.allclose(fit.predict(5, xreg_test), fit_no_oob.predict(5, xreg_test), rtol=1e-2) # Show we can get a confidence interval out here preds, conf = fit.predict(5, xreg_test, return_conf_int=True) assert all(isinstance(a, np.ndarray) for a in (preds, conf))
def test_with_oob(): # show we can fit with CV (kinda) arima = ARIMA(order=(2, 1, 2), suppress_warnings=True, out_of_sample_size=10).fit(y=hr) assert not np.isnan(arima.oob()) # show this works # show we can fit if ooss < 0 and oob will be nan arima = ARIMA(order=(2, 1, 2), suppress_warnings=True, out_of_sample_size=-1).fit(y=hr) assert np.isnan(arima.oob()) # This will raise since n_steps is not an int assert_raises(TypeError, arima.predict, n_periods="5") # But that we CAN forecast with an int... _ = arima.predict(n_periods=5) # noqa: F841 # Show we fail if cv > n_samples assert_raises(ValueError, ARIMA(order=(2, 1, 2), out_of_sample_size=1000).fit, hr)
def test_oob_sarimax(): xreg = rs.rand(wineind.shape[0], 2) fit = ARIMA(order=(1, 1, 1), seasonal_order=(0, 1, 1, 12), maxiter=5, out_of_sample_size=15).fit(y=wineind, exogenous=xreg) fit_no_oob = ARIMA(order=(1, 1, 1), seasonal_order=(0, 1, 1, 12), out_of_sample_size=0, maxiter=5, suppress_warnings=True).fit(y=wineind[:-15], exogenous=xreg[:-15, :]) # now assert some of the same things here that we did in the former test oob = fit.oob() # compare scores: scoring = val.get_scoring_metric(fit_no_oob.scoring) no_oob_preds = fit_no_oob.predict(n_periods=15, exogenous=xreg[-15:, :]) assert np.allclose(oob, scoring(wineind[-15:], no_oob_preds), rtol=1e-2) # show params are no longer the same assert not np.allclose(fit.params(), fit_no_oob.params(), rtol=1e-2) # show we can add the new samples and get the exact same forecasts xreg_test = rs.rand(5, 2) fit_no_oob.update(wineind[-15:], xreg[-15:, :]) assert np.allclose(fit.predict(5, xreg_test), fit_no_oob.predict(5, xreg_test), rtol=1e-2) # And also the params should be close now after updating assert np.allclose(fit.params(), fit_no_oob.params()) # Show we can get a confidence interval out here preds, conf = fit.predict(5, xreg_test, return_conf_int=True) assert all(isinstance(a, np.ndarray) for a in (preds, conf))
def test_oob_for_issue_28(): # Continuation of above: can we do one with an exogenous array, too? xreg = rs.rand(hr.shape[0], 4) arima = ARIMA(order=(2, 1, 2), suppress_warnings=True, out_of_sample_size=10).fit(y=hr, exogenous=xreg) oob = arima.oob() assert not np.isnan(oob) # Assert that the endog shapes match. First is equal to the original, # and the second is the differenced array, with original shape - d. assert np.allclose(arima.arima_res_.data.endog, hr, rtol=1e-2) assert arima.arima_res_.model.endog.shape[0] == hr.shape[0] - 1 # Now assert the same for exog assert np.allclose(arima.arima_res_.data.exog, xreg, rtol=1e-2) assert arima.arima_res_.model.exog.shape[0] == xreg.shape[0] - 1 # Compare the OOB score to an equivalent fit on data - 10 obs, but # without any OOB scoring, and we'll show that the OOB scoring in the # first IS in fact only applied to the first (train - n_out_of_bag) # samples arima_no_oob = ARIMA(order=(2, 1, 2), suppress_warnings=True, out_of_sample_size=0).fit(y=hr[:-10], exogenous=xreg[:-10, :]) scoring = get_callable(arima_no_oob.scoring, VALID_SCORING) preds = arima_no_oob.predict(n_periods=10, exogenous=xreg[-10:, :]) assert np.allclose(oob, scoring(hr[-10:], preds), rtol=1e-2) # Show that the model parameters are exactly the same xreg_test = rs.rand(5, 4) assert np.allclose(arima.params(), arima_no_oob.params(), rtol=1e-2) # Now assert on the forecast differences. with_oob_forecasts = arima.predict(n_periods=5, exogenous=xreg_test) no_oob_forecasts = arima_no_oob.predict(n_periods=5, exogenous=xreg_test) assert_raises(AssertionError, assert_array_almost_equal, with_oob_forecasts, no_oob_forecasts) # But after we update the no_oob model with the latest data, we should # be producing the same exact forecasts # First, show we'll fail if we try to add observations with no exogenous assert_raises(ValueError, arima_no_oob.add_new_observations, hr[-10:], None) # Also show we'll fail if we try to add mis-matched shapes of data assert_raises(ValueError, arima_no_oob.add_new_observations, hr[-10:], xreg_test) # Show we fail if we try to add observations with a different dim exog assert_raises(ValueError, arima_no_oob.add_new_observations, hr[-10:], xreg_test[:, :2]) # Actually add them now, and compare the forecasts (should be the same) arima_no_oob.add_new_observations(hr[-10:], xreg[-10:, :]) assert np.allclose(with_oob_forecasts, arima_no_oob.predict(n_periods=5, exogenous=xreg_test), rtol=1e-2)