コード例 #1
0
def test_gradient(key, data, dtype):
    """
    Test batched gradient implementation against scipy non-batched
    gradient.

    .. note:: it doesn't test that the loglikelihood is correct!
    """
    order, seasonal_order, intercept = extract_order(key)
    p, _, q = order
    P, _, Q, _ = seasonal_order
    h = 1e-8

    _, y_train_cudf, _, _, _, exog_past_cudf, *_ = get_dataset(data, dtype)

    # Create cuML model
    cuml_model = arima.ARIMA(endog=y_train_cudf,
                             exog=exog_past_cudf,
                             order=order,
                             seasonal_order=seasonal_order,
                             fit_intercept=intercept)

    N = cuml_model.complexity

    # Get an estimate of the parameters and pack them into a vector
    cuml_model._estimate_x0()
    x = cuml_model.pack()

    # Compute the batched loglikelihood gradient
    batched_grad = cuml_model._loglike_grad(x, h)

    # Iterate over the batch to compute a reference gradient
    scipy_grad = np.zeros(N * data.batch_size)
    for i in range(data.batch_size):
        # Create a model with only the current series
        model_i = arima.ARIMA(
            endog=y_train_cudf[y_train_cudf.columns[i]],
            exog=None if exog_past_cudf is None else
            exog_past_cudf[exog_past_cudf.columns[data.n_exog * i:data.n_exog *
                                                  (i + 1)]],
            order=order,
            seasonal_order=seasonal_order,
            fit_intercept=intercept)

        def f(x):
            return model_i._loglike(x)

        scipy_grad[N * i: N * (i + 1)] = \
            approx_fprime(x[N * i: N * (i + 1)], f, h)

    # Compare
    np.testing.assert_allclose(batched_grad, scipy_grad, rtol=0.001, atol=0.01)
コード例 #2
0
ファイル: test_arima.py プロジェクト: vinaydes/cuml
def test_start_params(key, data, dtype):
    """Test starting parameters against statsmodels
    """
    order, seasonal_order, intercept = extract_order(key)

    y, y_cudf = get_dataset(data, dtype)

    # Create models
    cuml_model = arima.ARIMA(y_cudf,
                             order=order,
                             seasonal_order=seasonal_order,
                             fit_intercept=intercept)
    ref_model = [
        sm.tsa.SARIMAX(y[col],
                       order=order,
                       seasonal_order=seasonal_order,
                       trend='c' if intercept else 'n') for col in y.columns
    ]

    # Estimate reference starting parameters
    N = cuml_model.complexity
    nb = data.batch_size
    x_ref = np.zeros(N * nb, dtype=dtype)
    for ib in range(nb):
        with warnings.catch_warnings():
            warnings.filterwarnings("ignore")
            x_ref[ib * N:(ib + 1) * N] = ref_model[ib].start_params[:N]

    # Estimate cuML starting parameters
    cuml_model._estimate_x0()
    x_cuml = cuml_model.pack()

    # Compare results
    np.testing.assert_allclose(x_cuml, x_ref, rtol=0.001, atol=0.01)
コード例 #3
0
ファイル: test_arima.py プロジェクト: vinaydes/cuml
def test_loglikelihood(key, data, dtype, simple_differencing):
    """Test loglikelihood against statsmodels (with the same values for the
    model parameters)
    """
    order, seasonal_order, intercept = extract_order(key)

    y, y_cudf = get_dataset(data, dtype)

    # Get fit reference model
    ref_fits = get_ref_fit(data, order, seasonal_order, intercept, dtype)

    # Create cuML model
    cuml_model = arima.ARIMA(y_cudf,
                             order=order,
                             seasonal_order=seasonal_order,
                             fit_intercept=intercept,
                             simple_differencing=simple_differencing)

    # Feed the parameters to the cuML model
    _statsmodels_to_cuml(ref_fits, cuml_model, order, seasonal_order,
                         intercept, dtype)

    # Compute loglikelihood
    cuml_llf = cuml_model.llf
    ref_llf = np.array([ref_fit.llf for ref_fit in ref_fits])

    # Compare results
    np.testing.assert_allclose(cuml_llf, ref_llf, rtol=0.01, atol=0.01)
コード例 #4
0
ファイル: test_arima.py プロジェクト: vinaydes/cuml
def test_integration(key, data, dtype):
    """Full integration test: estimate, fit, predict (in- and out-of-sample)
    """
    order, seasonal_order, intercept = extract_order(key)

    y, y_cudf = get_dataset(data, dtype)

    # Get fit reference model
    ref_fits = get_ref_fit(data, order, seasonal_order, intercept, dtype)

    # Create and fit cuML model
    cuml_model = arima.ARIMA(y_cudf,
                             order=order,
                             seasonal_order=seasonal_order,
                             fit_intercept=intercept,
                             output_type='numpy')
    cuml_model.fit()

    # Predict
    cuml_pred = cuml_model.predict(data.start, data.end)
    ref_preds = np.zeros((data.end - data.start, data.batch_size))
    for i in range(data.batch_size):
        ref_preds[:,
                  i] = ref_fits[i].get_prediction(data.start,
                                                  data.end - 1).predicted_mean

    # Compare results
    np.testing.assert_allclose(cuml_pred,
                               ref_preds,
                               rtol=data.tolerance_integration,
                               atol=data.tolerance_integration)
コード例 #5
0
ファイル: test_arima.py プロジェクト: teju85/cuml
def _predict_common(key, data, dtype, start, end, num_steps=None):
    """Utility function used by test_predict and test_forecast to avoid
    code duplication.
    """
    order, seasonal_order, intercept = extract_order(key)

    y, y_cudf = get_dataset(data, dtype)

    # Get fit reference model
    ref_fits = get_ref_fit(data, order, seasonal_order, intercept, dtype)

    # Create cuML model
    cuml_model = arima.ARIMA(y_cudf, order, seasonal_order,
                             fit_intercept=intercept, output_type='numpy')

    # Feed the parameters to the cuML model
    _statsmodels_to_cuml(ref_fits, cuml_model, order, seasonal_order,
                         intercept, dtype)

    # Predict or forecast
    ref_preds = np.zeros((end - start, data.batch_size))
    for i in range(data.batch_size):
        ref_preds[:, i] = ref_fits[i].get_prediction(
            start, end - 1).predicted_mean
    if num_steps is None:
        cuml_pred = cuml_model.predict(start, end)
    else:
        cuml_pred = cuml_model.forecast(num_steps)

    # Compare results
    np.testing.assert_allclose(cuml_pred, ref_preds, rtol=0.001, atol=0.01)
コード例 #6
0
def test_gradient(test_case, dtype):
    """Test batched gradient implementation against scipy non-batched
    gradient. Note: it doesn't test that the loglikelihood is correct!
    """
    key, data = test_case
    order, seasonal_order, intercept = extract_order(key)
    p, _, q = order
    P, _, Q, _ = seasonal_order
    N = p + P + q + Q + intercept + 1
    h = 1e-8

    y, y_cudf = get_dataset(data, dtype)

    # Create cuML model
    cuml_model = arima.ARIMA(y_cudf,
                             order,
                             seasonal_order,
                             fit_intercept=intercept)

    # Get an estimate of the parameters and pack them into a vector
    cuml_model._estimate_x0()
    x = cuml_model.pack()

    # Compute the batched loglikelihood gradient
    batched_grad = cuml_model._loglike_grad(x, h)

    # Iterate over the batch to compute a reference gradient
    scipy_grad = np.zeros(N * data.batch_size)
    for i in range(data.batch_size):
        # Create a model with only the current series
        model_i = arima.ARIMA(y_cudf[y_cudf.columns[i]],
                              order,
                              seasonal_order,
                              fit_intercept=intercept)

        def f(x):
            return model_i._loglike(x)

        scipy_grad[N * i: N * (i + 1)] = \
            _approx_fprime_helper(x[N * i: N * (i + 1)], f, h)

    # Compare
    np.testing.assert_allclose(batched_grad, scipy_grad, rtol=0.001, atol=0.01)
コード例 #7
0
def test_start_params(key, data, dtype):
    """Test starting parameters against statsmodels
    """
    order, seasonal_order, intercept = extract_order(key)

    y_train, y_train_cudf, _, _, exog_past, exog_past_cudf, *_ \
        = get_dataset(data, dtype)

    # fillna for reference to match cuML initial estimation strategy
    y_train_nona = fill_interpolation(y_train)

    # Convert to numpy to avoid misaligned indices
    if exog_past is not None:
        exog_past_np = exog_past.to_numpy()

    # Create models
    cuml_model = arima.ARIMA(endog=y_train_cudf,
                             exog=exog_past_cudf,
                             order=order,
                             seasonal_order=seasonal_order,
                             fit_intercept=intercept)
    ref_model = [
        sm.tsa.SARIMAX(endog=y_train_nona[y_train_nona.columns[i]],
                       exog=exog_past_np[:, i * data.n_exog:(i + 1) *
                                         data.n_exog] if data.n_exog else None,
                       order=order,
                       seasonal_order=seasonal_order,
                       trend='c' if intercept else 'n')
        for i in range(data.batch_size)
    ]

    # Estimate reference starting parameters
    N = cuml_model.complexity
    nb = data.batch_size
    x_ref = np.zeros(N * nb, dtype=dtype)
    for ib in range(nb):
        with warnings.catch_warnings():
            warnings.filterwarnings("ignore")
            x_ref[ib * N:(ib + 1) * N] = ref_model[ib].start_params[:N]

    # Estimate cuML starting parameters
    cuml_model._estimate_x0()
    x_cuml = cuml_model.pack()

    # Compare results
    np.testing.assert_allclose(x_cuml, x_ref, rtol=0.001, atol=0.01)
コード例 #8
0
def test_integration(key, data, dtype):
    """Full integration test: estimate, fit, forecast
    """
    order, seasonal_order, intercept = extract_order(key)
    s = max(1, seasonal_order[3])

    y_train, y_train_cudf, y_test, _, _, exog_past_cudf, exog_fut, \
        exog_fut_cudf = get_dataset(data, dtype)

    # Get fit reference model
    ref_fits = get_ref_fit(data, order, seasonal_order, intercept, dtype)

    # Create and fit cuML model
    cuml_model = arima.ARIMA(endog=y_train_cudf,
                             exog=exog_past_cudf,
                             order=order,
                             seasonal_order=seasonal_order,
                             fit_intercept=intercept,
                             output_type='numpy')
    cuml_model.fit()

    # Predict
    y_fc_cuml = cuml_model.forecast(data.n_test, exog=exog_fut)
    y_fc_ref = np.zeros((data.n_test, data.batch_size))
    for i in range(data.batch_size):
        y_fc_ref[:, i] = ref_fits[i].get_prediction(
            data.n_train,
            data.n_obs - 1,
            exog=None if data.n_exog == 0 else
            exog_fut[exog_fut.columns[data.n_exog * i:data.n_exog *
                                      (i + 1)]]).predicted_mean

    # Compare results: MASE must be better or within the tolerance margin
    mase_ref = mase(y_train, y_test, y_fc_ref, s)
    mase_cuml = mase(y_train, y_test, y_fc_cuml, s)
    assert mase_cuml < mase_ref * (1. + data.tolerance_integration)
コード例 #9
0
ファイル: test_arima.py プロジェクト: vinaydes/cuml
def _predict_common(key,
                    data,
                    dtype,
                    start,
                    end,
                    num_steps=None,
                    level=None,
                    simple_differencing=True):
    """Utility function used by test_predict and test_forecast to avoid
    code duplication.
    """
    order, seasonal_order, intercept = extract_order(key)

    y, y_cudf = get_dataset(data, dtype)

    # Get fit reference model
    ref_fits = get_ref_fit(data, order, seasonal_order, intercept, dtype)

    # Create cuML model
    cuml_model = arima.ARIMA(y_cudf,
                             order=order,
                             seasonal_order=seasonal_order,
                             fit_intercept=intercept,
                             output_type='numpy',
                             simple_differencing=simple_differencing)

    # Feed the parameters to the cuML model
    _statsmodels_to_cuml(ref_fits, cuml_model, order, seasonal_order,
                         intercept, dtype)

    # Predict or forecast
    # Reference (statsmodels)
    ref_preds = np.zeros((end - start, data.batch_size))
    for i in range(data.batch_size):
        ref_preds[:, i] = ref_fits[i].get_prediction(start,
                                                     end - 1).predicted_mean
    if level is not None:
        ref_lower = np.zeros((end - start, data.batch_size))
        ref_upper = np.zeros((end - start, data.batch_size))
        for i in range(data.batch_size):
            temp_pred = ref_fits[i].get_forecast(num_steps)
            ci = temp_pred.summary_frame(alpha=1 - level)
            ref_lower[:, i] = ci["mean_ci_lower"].to_numpy()
            ref_upper[:, i] = ci["mean_ci_upper"].to_numpy()
    # cuML
    if num_steps is None:
        cuml_pred = cuml_model.predict(start, end)
    elif level is not None:
        cuml_pred, cuml_lower, cuml_upper = \
            cuml_model.forecast(num_steps, level)
    else:
        cuml_pred = cuml_model.forecast(num_steps)

    # Compare results
    np.testing.assert_allclose(cuml_pred, ref_preds, rtol=0.001, atol=0.01)
    if level is not None:
        np.testing.assert_allclose(cuml_lower,
                                   ref_lower,
                                   rtol=0.005,
                                   atol=0.01)
        np.testing.assert_allclose(cuml_upper,
                                   ref_upper,
                                   rtol=0.005,
                                   atol=0.01)