def test_kernel_gpu_cpu_shap(dtype, n_features, n_background, model): X_train, X_test, y_train, y_test = create_synthetic_dataset( n_samples=n_background + 3, n_features=n_features, test_size=3, noise=0.1, dtype=dtype) mod = model().fit(X_train, y_train) explainer, shap_values = get_shap_values(model=mod.predict, background_dataset=X_train, explained_dataset=X_test, explainer=KernelExplainer) exp_v = explainer.expected_value fx = mod.predict(X_test) for test_idx in range(3): assert (np.sum(shap_values[test_idx]) - abs(fx[test_idx] - exp_v)) <= 1e-5 if has_shap(): import shap explainer = shap.KernelExplainer(mod.predict, cp.asnumpy(X_train)) cpu_shap_values = explainer.shap_values(cp.asnumpy(X_test)) assert np.allclose(shap_values, cpu_shap_values, rtol=1e-01, atol=1e-01)
def test_kernel_gpu_cpu_shap(dtype, nfeatures, nbackground, model): X, y = cuml.datasets.make_regression(n_samples=nbackground + 5, n_features=nfeatures, noise=0.1) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=5, random_state=42) X_train = X_train.astype(dtype) X_test = X_test.astype(dtype) y_train = y_train.astype(dtype) y_test = y_test.astype(dtype) mod = model().fit(X_train, y_train) cu_explainer = \ cuml.experimental.explainer.KernelExplainer(model=mod.predict, data=X_train, is_gpu_model=True) cu_shap_values = cu_explainer.shap_values(X_test) exp_v = cu_explainer.expected_value fx = mod.predict(X_test) for test_idx in range(5): assert (np.sum(cu_shap_values[test_idx]) - abs(fx[test_idx] - exp_v)) <= 1e-5 if has_shap("0.37"): import shap explainer = shap.KernelExplainer(mod.predict, cp.asnumpy(X_train)) shap_values = explainer.shap_values(cp.asnumpy(X_test)) # note that small variances in the l1_regression with larger # n_features, even among runs of the same explainer can cause this # test to be flaky, better testing strategy in process. assert np.allclose(cu_shap_values, shap_values, rtol=1e-01, atol=1e-01)
import pytest import treelite import numpy as np import cupy as cp import cudf from cuml.experimental.explainer.tree_shap import TreeExplainer from cuml.common.import_utils import has_xgboost, has_shap from cuml.common.exceptions import NotFittedError from cuml.ensemble import RandomForestRegressor as curfr from cuml.ensemble import RandomForestClassifier as curfc from sklearn.datasets import make_regression, make_classification if has_xgboost(): import xgboost as xgb if has_shap(): import shap @pytest.mark.parametrize('objective', [ 'reg:linear', 'reg:squarederror', 'reg:squaredlogerror', 'reg:pseudohubererror' ]) @pytest.mark.skipif(not has_xgboost(), reason="need to install xgboost") @pytest.mark.skipif(not has_shap(), reason="need to install shap") def test_xgb_regressor(objective): n_samples = 100 X, y = make_regression(n_samples=n_samples, n_features=8, n_informative=8, n_targets=1,