def test_gradient(key, data, dtype): """ Test batched gradient implementation against scipy non-batched gradient. .. note:: it doesn't test that the loglikelihood is correct! """ order, seasonal_order, intercept = extract_order(key) p, _, q = order P, _, Q, _ = seasonal_order h = 1e-8 _, y_train_cudf, _, _, _, exog_past_cudf, *_ = get_dataset(data, dtype) # Create cuML model cuml_model = arima.ARIMA(endog=y_train_cudf, exog=exog_past_cudf, order=order, seasonal_order=seasonal_order, fit_intercept=intercept) N = cuml_model.complexity # Get an estimate of the parameters and pack them into a vector cuml_model._estimate_x0() x = cuml_model.pack() # Compute the batched loglikelihood gradient batched_grad = cuml_model._loglike_grad(x, h) # Iterate over the batch to compute a reference gradient scipy_grad = np.zeros(N * data.batch_size) for i in range(data.batch_size): # Create a model with only the current series model_i = arima.ARIMA( endog=y_train_cudf[y_train_cudf.columns[i]], exog=None if exog_past_cudf is None else exog_past_cudf[exog_past_cudf.columns[data.n_exog * i:data.n_exog * (i + 1)]], order=order, seasonal_order=seasonal_order, fit_intercept=intercept) def f(x): return model_i._loglike(x) scipy_grad[N * i: N * (i + 1)] = \ approx_fprime(x[N * i: N * (i + 1)], f, h) # Compare np.testing.assert_allclose(batched_grad, scipy_grad, rtol=0.001, atol=0.01)
def test_start_params(key, data, dtype): """Test starting parameters against statsmodels """ order, seasonal_order, intercept = extract_order(key) y, y_cudf = get_dataset(data, dtype) # Create models cuml_model = arima.ARIMA(y_cudf, order=order, seasonal_order=seasonal_order, fit_intercept=intercept) ref_model = [ sm.tsa.SARIMAX(y[col], order=order, seasonal_order=seasonal_order, trend='c' if intercept else 'n') for col in y.columns ] # Estimate reference starting parameters N = cuml_model.complexity nb = data.batch_size x_ref = np.zeros(N * nb, dtype=dtype) for ib in range(nb): with warnings.catch_warnings(): warnings.filterwarnings("ignore") x_ref[ib * N:(ib + 1) * N] = ref_model[ib].start_params[:N] # Estimate cuML starting parameters cuml_model._estimate_x0() x_cuml = cuml_model.pack() # Compare results np.testing.assert_allclose(x_cuml, x_ref, rtol=0.001, atol=0.01)
def test_loglikelihood(key, data, dtype, simple_differencing): """Test loglikelihood against statsmodels (with the same values for the model parameters) """ order, seasonal_order, intercept = extract_order(key) y, y_cudf = get_dataset(data, dtype) # Get fit reference model ref_fits = get_ref_fit(data, order, seasonal_order, intercept, dtype) # Create cuML model cuml_model = arima.ARIMA(y_cudf, order=order, seasonal_order=seasonal_order, fit_intercept=intercept, simple_differencing=simple_differencing) # Feed the parameters to the cuML model _statsmodels_to_cuml(ref_fits, cuml_model, order, seasonal_order, intercept, dtype) # Compute loglikelihood cuml_llf = cuml_model.llf ref_llf = np.array([ref_fit.llf for ref_fit in ref_fits]) # Compare results np.testing.assert_allclose(cuml_llf, ref_llf, rtol=0.01, atol=0.01)
def test_integration(key, data, dtype): """Full integration test: estimate, fit, predict (in- and out-of-sample) """ order, seasonal_order, intercept = extract_order(key) y, y_cudf = get_dataset(data, dtype) # Get fit reference model ref_fits = get_ref_fit(data, order, seasonal_order, intercept, dtype) # Create and fit cuML model cuml_model = arima.ARIMA(y_cudf, order=order, seasonal_order=seasonal_order, fit_intercept=intercept, output_type='numpy') cuml_model.fit() # Predict cuml_pred = cuml_model.predict(data.start, data.end) ref_preds = np.zeros((data.end - data.start, data.batch_size)) for i in range(data.batch_size): ref_preds[:, i] = ref_fits[i].get_prediction(data.start, data.end - 1).predicted_mean # Compare results np.testing.assert_allclose(cuml_pred, ref_preds, rtol=data.tolerance_integration, atol=data.tolerance_integration)
def _predict_common(key, data, dtype, start, end, num_steps=None): """Utility function used by test_predict and test_forecast to avoid code duplication. """ order, seasonal_order, intercept = extract_order(key) y, y_cudf = get_dataset(data, dtype) # Get fit reference model ref_fits = get_ref_fit(data, order, seasonal_order, intercept, dtype) # Create cuML model cuml_model = arima.ARIMA(y_cudf, order, seasonal_order, fit_intercept=intercept, output_type='numpy') # Feed the parameters to the cuML model _statsmodels_to_cuml(ref_fits, cuml_model, order, seasonal_order, intercept, dtype) # Predict or forecast ref_preds = np.zeros((end - start, data.batch_size)) for i in range(data.batch_size): ref_preds[:, i] = ref_fits[i].get_prediction( start, end - 1).predicted_mean if num_steps is None: cuml_pred = cuml_model.predict(start, end) else: cuml_pred = cuml_model.forecast(num_steps) # Compare results np.testing.assert_allclose(cuml_pred, ref_preds, rtol=0.001, atol=0.01)
def test_gradient(test_case, dtype): """Test batched gradient implementation against scipy non-batched gradient. Note: it doesn't test that the loglikelihood is correct! """ key, data = test_case order, seasonal_order, intercept = extract_order(key) p, _, q = order P, _, Q, _ = seasonal_order N = p + P + q + Q + intercept + 1 h = 1e-8 y, y_cudf = get_dataset(data, dtype) # Create cuML model cuml_model = arima.ARIMA(y_cudf, order, seasonal_order, fit_intercept=intercept) # Get an estimate of the parameters and pack them into a vector cuml_model._estimate_x0() x = cuml_model.pack() # Compute the batched loglikelihood gradient batched_grad = cuml_model._loglike_grad(x, h) # Iterate over the batch to compute a reference gradient scipy_grad = np.zeros(N * data.batch_size) for i in range(data.batch_size): # Create a model with only the current series model_i = arima.ARIMA(y_cudf[y_cudf.columns[i]], order, seasonal_order, fit_intercept=intercept) def f(x): return model_i._loglike(x) scipy_grad[N * i: N * (i + 1)] = \ _approx_fprime_helper(x[N * i: N * (i + 1)], f, h) # Compare np.testing.assert_allclose(batched_grad, scipy_grad, rtol=0.001, atol=0.01)
def test_start_params(key, data, dtype): """Test starting parameters against statsmodels """ order, seasonal_order, intercept = extract_order(key) y_train, y_train_cudf, _, _, exog_past, exog_past_cudf, *_ \ = get_dataset(data, dtype) # fillna for reference to match cuML initial estimation strategy y_train_nona = fill_interpolation(y_train) # Convert to numpy to avoid misaligned indices if exog_past is not None: exog_past_np = exog_past.to_numpy() # Create models cuml_model = arima.ARIMA(endog=y_train_cudf, exog=exog_past_cudf, order=order, seasonal_order=seasonal_order, fit_intercept=intercept) ref_model = [ sm.tsa.SARIMAX(endog=y_train_nona[y_train_nona.columns[i]], exog=exog_past_np[:, i * data.n_exog:(i + 1) * data.n_exog] if data.n_exog else None, order=order, seasonal_order=seasonal_order, trend='c' if intercept else 'n') for i in range(data.batch_size) ] # Estimate reference starting parameters N = cuml_model.complexity nb = data.batch_size x_ref = np.zeros(N * nb, dtype=dtype) for ib in range(nb): with warnings.catch_warnings(): warnings.filterwarnings("ignore") x_ref[ib * N:(ib + 1) * N] = ref_model[ib].start_params[:N] # Estimate cuML starting parameters cuml_model._estimate_x0() x_cuml = cuml_model.pack() # Compare results np.testing.assert_allclose(x_cuml, x_ref, rtol=0.001, atol=0.01)
def test_integration(key, data, dtype): """Full integration test: estimate, fit, forecast """ order, seasonal_order, intercept = extract_order(key) s = max(1, seasonal_order[3]) y_train, y_train_cudf, y_test, _, _, exog_past_cudf, exog_fut, \ exog_fut_cudf = get_dataset(data, dtype) # Get fit reference model ref_fits = get_ref_fit(data, order, seasonal_order, intercept, dtype) # Create and fit cuML model cuml_model = arima.ARIMA(endog=y_train_cudf, exog=exog_past_cudf, order=order, seasonal_order=seasonal_order, fit_intercept=intercept, output_type='numpy') cuml_model.fit() # Predict y_fc_cuml = cuml_model.forecast(data.n_test, exog=exog_fut) y_fc_ref = np.zeros((data.n_test, data.batch_size)) for i in range(data.batch_size): y_fc_ref[:, i] = ref_fits[i].get_prediction( data.n_train, data.n_obs - 1, exog=None if data.n_exog == 0 else exog_fut[exog_fut.columns[data.n_exog * i:data.n_exog * (i + 1)]]).predicted_mean # Compare results: MASE must be better or within the tolerance margin mase_ref = mase(y_train, y_test, y_fc_ref, s) mase_cuml = mase(y_train, y_test, y_fc_cuml, s) assert mase_cuml < mase_ref * (1. + data.tolerance_integration)
def _predict_common(key, data, dtype, start, end, num_steps=None, level=None, simple_differencing=True): """Utility function used by test_predict and test_forecast to avoid code duplication. """ order, seasonal_order, intercept = extract_order(key) y, y_cudf = get_dataset(data, dtype) # Get fit reference model ref_fits = get_ref_fit(data, order, seasonal_order, intercept, dtype) # Create cuML model cuml_model = arima.ARIMA(y_cudf, order=order, seasonal_order=seasonal_order, fit_intercept=intercept, output_type='numpy', simple_differencing=simple_differencing) # Feed the parameters to the cuML model _statsmodels_to_cuml(ref_fits, cuml_model, order, seasonal_order, intercept, dtype) # Predict or forecast # Reference (statsmodels) ref_preds = np.zeros((end - start, data.batch_size)) for i in range(data.batch_size): ref_preds[:, i] = ref_fits[i].get_prediction(start, end - 1).predicted_mean if level is not None: ref_lower = np.zeros((end - start, data.batch_size)) ref_upper = np.zeros((end - start, data.batch_size)) for i in range(data.batch_size): temp_pred = ref_fits[i].get_forecast(num_steps) ci = temp_pred.summary_frame(alpha=1 - level) ref_lower[:, i] = ci["mean_ci_lower"].to_numpy() ref_upper[:, i] = ci["mean_ci_upper"].to_numpy() # cuML if num_steps is None: cuml_pred = cuml_model.predict(start, end) elif level is not None: cuml_pred, cuml_lower, cuml_upper = \ cuml_model.forecast(num_steps, level) else: cuml_pred = cuml_model.forecast(num_steps) # Compare results np.testing.assert_allclose(cuml_pred, ref_preds, rtol=0.001, atol=0.01) if level is not None: np.testing.assert_allclose(cuml_lower, ref_lower, rtol=0.005, atol=0.01) np.testing.assert_allclose(cuml_upper, ref_upper, rtol=0.005, atol=0.01)