Beispiel #1
0
def test_kernel_gpu_cpu_shap(dtype, n_features, n_background, model):
    X_train, X_test, y_train, y_test = create_synthetic_dataset(
        n_samples=n_background + 3,
        n_features=n_features,
        test_size=3,
        noise=0.1,
        dtype=dtype)

    mod = model().fit(X_train, y_train)
    explainer, shap_values = get_shap_values(model=mod.predict,
                                             background_dataset=X_train,
                                             explained_dataset=X_test,
                                             explainer=KernelExplainer)

    exp_v = explainer.expected_value

    fx = mod.predict(X_test)
    for test_idx in range(3):
        assert (np.sum(shap_values[test_idx]) -
                abs(fx[test_idx] - exp_v)) <= 1e-5

    if has_shap():
        import shap
        explainer = shap.KernelExplainer(mod.predict, cp.asnumpy(X_train))
        cpu_shap_values = explainer.shap_values(cp.asnumpy(X_test))

        assert np.allclose(shap_values,
                           cpu_shap_values,
                           rtol=1e-01,
                           atol=1e-01)
Beispiel #2
0
def test_kernel_shap_standalone(dtype, n_features, n_background, model):
    X_train, X_test, y_train, y_test = create_synthetic_dataset(
        n_samples=n_background + 3,
        n_features=n_features,
        test_size=3,
        noise=0.1,
        dtype=dtype)

    mod = model(n_components=3).fit(X_train, y_train)
    explainer, shap_values = get_shap_values(model=mod.transform,
                                             background_dataset=X_train,
                                             explained_dataset=X_test,
                                             explainer=KernelExplainer)

    exp_v = explainer.expected_value

    # we have 3 lists of shap values, each corresponding to a component since
    # transform gives back arrays of shape (nrows x ncomponents)
    # we test that for each test row, for each component, the
    # sum of the shap values is the same as the difference between the
    # expected value for that component minus the value of the transform of
    # the row.
    for sv_idx in range(3):
        # pca and tsvd transform give results back nested
        fx = mod.transform(X_test[sv_idx].reshape(1, n_features))[0]

        for comp_idx in range(3):
            assert (np.sum(shap_values[comp_idx][sv_idx]) -
                    abs(fx[comp_idx] - exp_v[comp_idx])) <= 1e-5
Beispiel #3
0
def test_exact_regression_datasets(exact_shap_regression_dataset, model):
    X_train, X_test, y_train, y_test = exact_shap_regression_dataset

    models = []
    models.append(model().fit(X_train, y_train))
    models.append(cuml_skl_class_dict[model]().fit(X_train, y_train))

    for mod in models:
        explainer, shap_values = get_shap_values(model=mod.predict,
                                                 background_dataset=X_train,
                                                 explained_dataset=X_test,
                                                 explainer=KernelExplainer)
        for i in range(3):
            print(i)
            assert_and_log(shap_values[i], golden_regression_results[model][i],
                           mod.predict(X_test[i].reshape(1, X_test.shape[1])),
                           explainer.expected_value)
def test_regression_datasets(exact_shap_regression_dataset, model):
    X_train, X_test, y_train, y_test = exact_shap_regression_dataset

    models = []
    models.append(model().fit(X_train, y_train))
    models.append(cuml_skl_class_dict[model]().fit(X_train, y_train))

    for mod in models:
        explainer, shap_values = get_shap_values(
            model=mod.predict,
            background_dataset=X_train,
            explained_dataset=X_test,
            explainer=PermutationExplainer)

        fx = mod.predict(X_test)
        exp_v = explainer.expected_value

        for i in range(3):
            assert (np.sum(cp.asnumpy(shap_values[i])) -
                    abs(fx[i] - exp_v)) <= 1e-5
Beispiel #5
0
def test_exact_classification_datasets(exact_shap_classification_dataset):
    X_train, X_test, y_train, y_test = exact_shap_classification_dataset

    models = []
    models.append(cuml.SVC(probability=True).fit(X_train, y_train))
    models.append(sklearn.svm.SVC(probability=True).fit(X_train, y_train))

    for mod in models:
        explainer, shap_values = get_shap_values(model=mod.predict_proba,
                                                 background_dataset=X_train,
                                                 explained_dataset=X_test,
                                                 explainer=KernelExplainer)

        # Some values are very small, which mean our tolerance here needs to be
        # a little looser to avoid false positives from comparisons like
        # 0.00348627 - 0.00247397. The loose tolerance still tests that the
        # distribution of the values matches.
        for idx, svs in enumerate(shap_values):
            assert_and_log(svs[0],
                           golden_classification_result[idx],
                           float(mod.predict_proba(X_test)[0][idx]),
                           explainer.expected_value[idx],
                           tolerance=1e-01)
def test_exact_classification_datasets(exact_shap_classification_dataset):
    X_train, X_test, y_train, y_test = exact_shap_classification_dataset

    models = []
    models.append(cuml.SVC(probability=True).fit(X_train, y_train))
    models.append(sklearn.svm.SVC(probability=True).fit(X_train, y_train))

    for mod in models:
        explainer, shap_values = get_shap_values(
            model=mod.predict_proba,
            background_dataset=X_train,
            explained_dataset=X_test,
            explainer=PermutationExplainer,
        )

        fx = mod.predict_proba(X_test)
        exp_v = explainer.expected_value

        for i in range(3):
            print(i, fx[i][1], shap_values[1][i])
            assert (np.sum(cp.asnumpy(shap_values[0][i])) -
                    abs(fx[i][0] - exp_v[0])) <= 1e-5
            assert (np.sum(cp.asnumpy(shap_values[1][i])) -
                    abs(fx[i][1] - exp_v[1])) <= 1e-5