Ejemplo n.º 1
0
def test_tree_shap():
    np.random.seed(0)

    alibi_model = make_tree_shap()
    tree_shap = TreeShap(alibi_model)
    adult = fetch_adult()
    X_test = adult.data[30001:, :]
    explanation = tree_shap.explain(X_test[0:1].tolist())
    exp_json = json.loads(explanation.to_json())
    assert exp_json["meta"]["name"] == "TreeShap"
Ejemplo n.º 2
0
def test_tree_shap():
    os.environ.clear()
    alibi_model = os.path.join(kfserving.Storage.download(ADULT_EXPLAINER_URI),
                               EXPLAINER_FILENAME)
    with open(alibi_model, "rb") as f:
        alibi_model = dill.load(f)
        tree_shap = TreeShap(alibi_model)
        adult = fetch_adult()
        X_test = adult.data[30001:, :]
        np.random.seed(0)
        explanation = tree_shap.explain(X_test[0:1].tolist())
        exp_json = json.loads(explanation.to_json())
        print(exp_json)
Ejemplo n.º 3
0
 def __init__(self, ):
     adult = fetch_adult()
     data = adult.data
     target = adult.target
     self.feature_names = adult.feature_names
     self.category_map = adult.category_map
     np.random.seed(0)
     data_perm = np.random.permutation(np.c_[data, target])
     data = data_perm[:, :-1]
     target = data_perm[:, -1]
     idx = 30000
     self.X_train, self.Y_train = data[:idx, :], target[:idx]
     self.X_test, self.Y_test = data[idx + 1:, :], target[idx + 1:]
Ejemplo n.º 4
0
def test_adult(return_X_y):
    data = fetch_adult(return_X_y=return_X_y)
    if return_X_y:
        assert len(data) == 2
        X, y = data
    else:
        assert len(data) == 5
        X = data.data
        y = data.target

    assert X.ndim == ADULT_DIM
    assert X.shape[1] == ADULT_FEATURES
    assert len(X) == len(y)
    assert len(set(y)) == ADULT_CLASSES
Ejemplo n.º 5
0
def adult_dataset():
    """
    Loads and preprocesses Adult dataset.
    """

    # load raw data
    adult = fetch_adult()
    data = adult.data
    target = adult.target
    feature_names = adult.feature_names
    category_map = adult.category_map

    # split it
    idx = 30000
    X_train, Y_train = data[:idx, :], target[:idx]
    X_test, Y_test = data[idx + 1:, :], target[idx + 1:]

    # Create feature transformation pipeline
    ordinal_features = [
        x for x in range(len(feature_names))
        if x not in list(category_map.keys())
    ]
    ordinal_transformer = Pipeline(
        steps=[('imputer',
                SimpleImputer(strategy='median')), ('scaler',
                                                    StandardScaler())])

    categorical_features = list(category_map.keys())
    categorical_transformer = Pipeline(
        steps=[('imputer', SimpleImputer(strategy='median')
                ), ('onehot', OneHotEncoder(handle_unknown='ignore'))])
    preprocessor = ColumnTransformer(
        transformers=[('num', ordinal_transformer, ordinal_features
                       ), ('cat', categorical_transformer,
                           categorical_features)])
    preprocessor.fit(X_train)

    return {
        'X_train': X_train,
        'y_train': Y_train,
        'X_test': X_test,
        'y_test': Y_test,
        'preprocessor': preprocessor,
        'metadata': {
            'feature_names': feature_names,
            'category_map': category_map,
            'name': 'adult'
        }
    }
Ejemplo n.º 6
0
def test_kernel_shap():
    os.environ.clear()
    alibi_model = os.path.join(kfserving.Storage.download(ADULT_EXPLAINER_URI),
                               EXPLAINER_FILENAME)
    with open(alibi_model, "rb") as f:
        skmodel = SKLearnServer(ADULT_MODEL_URI)
        skmodel.load()
        alibi_model = dill.load(f)
        kernel_shap = KernelShap(skmodel.predict, alibi_model)
        adult = fetch_adult()
        X_test = adult.data[30001:, :]
        np.random.seed(0)
        explanation = kernel_shap.explain(X_test[0:1].tolist())
        exp_json = json.loads(explanation.to_json())
        print(exp_json)
Ejemplo n.º 7
0
def tf_keras_adult(tf_keras_adult_model):
    # fetch data
    adult = fetch_adult()
    X = adult.data
    X_ord = np.c_[X[:, 1:8], X[:, 11], X[:, 0], X[:, 8:11]]
    y = adult.target

    # scale numerical features
    X_num = X_ord[:, -4:].astype(np.float32, copy=False)
    xmin, xmax = X_num.min(axis=0), X_num.max(axis=0)
    rng = (-1., 1.)
    X_num_scaled = (X_num - xmin) / (xmax - xmin) * (rng[1] - rng[0]) + rng[0]

    # OHE categorical features
    X_cat = X_ord[:, :-4].copy()
    ohe = OneHotEncoder()
    ohe.fit(X_cat)
    X_cat_ohe = ohe.transform(X_cat)

    # combine categorical and numerical data
    X_comb = np.c_[X_cat_ohe.todense(), X_num_scaled].astype(np.float32,
                                                             copy=False)

    # split in train and test set
    idx = 30000
    X_train, y_train = X_comb[:idx, :], y[:idx]

    assert X_train.shape[1] == 57

    # set random seed
    np.random.seed(1)
    tf.set_random_seed(1)

    model = tf_keras_adult_model
    model.fit(X_train,
              to_categorical(y_train),
              batch_size=128,
              epochs=5,
              verbose=0)

    # create categorical variable dict
    cat_vars_ord = {}
    n_categories = 8
    for i in range(n_categories):
        cat_vars_ord[i] = len(np.unique(X_ord[:, i]))
    cat_vars_ohe = ord_to_ohe(X_ord, cat_vars_ord)[1]

    return X_train, model, cat_vars_ohe
Ejemplo n.º 8
0
def test_anchor_tabular():

    alibi_model = os.path.join(
        kfserving.Storage.download(ADULT_EXPLAINER_URI), EXPLAINER_FILENAME
    )
    with open(alibi_model, "rb") as f:
        skmodel = SKLearnServer(ADULT_MODEL_URI)
        skmodel.load()
        alibi_model = dill.load(f)
        anchor_tabular = AnchorTabular(skmodel.predict, alibi_model)
        adult = fetch_adult()
        X_test = adult.data[30001:, :]
        np.random.seed(0)
        explanation = anchor_tabular.explain(X_test[0:1].tolist())
        exp_json = json.loads(explanation.to_json())
        assert exp_json["data"]["anchor"][0] == "Marital Status = Never-Married"
Ejemplo n.º 9
0
def test_adult(return_X_y):
    try:
        data = fetch_adult(return_X_y=return_X_y)
    except RequestException:
        pytest.skip('Adult dataset URL down')
    if return_X_y:
        assert len(data) == 2
        X, y = data
    else:
        assert len(data) == 5
        X = data.data
        y = data.target

    assert X.ndim == ADULT_DIM
    assert X.shape[1] == ADULT_FEATURES
    assert len(X) == len(y)
    assert len(set(y)) == ADULT_CLASSES
Ejemplo n.º 10
0
def test_anchor_tabular():
    os.environ.clear()
    alibi_model = os.path.join(kserve.Storage.download(ADULT_EXPLAINER_URI),
                               EXPLAINER_FILENAME)
    with open(alibi_model, "rb") as f:
        skmodel = SKLearnModel("adult", ADULT_MODEL_URI)
        skmodel.load()
        predictor = Predictor(skmodel)
        alibi_model = dill.load(f)
        anchor_tabular = AnchorTabular(predictor.predict_fn, alibi_model)
        adult = fetch_adult()
        X_test = adult.data[30001:, :]
        np.random.seed(0)
        explanation = anchor_tabular.explain(X_test[0:1].tolist())
        exp_json = json.loads(explanation.to_json())
        assert exp_json["data"]["anchor"][0] == "Relationship = Own-child" or \
               exp_json["data"]["anchor"][0] == "Age <= 28.00"
Ejemplo n.º 11
0
def make_tree_shap(dirname: Optional[Path] = None) -> TreeShap:
    np.random.seed(0)

    # get X_train for explainer fit
    adult = fetch_adult()
    data = adult.data
    target = adult.target
    data_perm = np.random.permutation(np.c_[data, target])
    data = data_perm[:, :-1]
    target = data_perm[:, -1]
    idx = 30000
    X_train, y_train = data[:idx, :], target[:idx]
    X_test, y_test = data[idx + 1:, :], target[idx + 1:]

    d_train = xgboost.DMatrix(X_train, label=y_train)
    d_test = xgboost.DMatrix(X_test, label=y_test)

    params = {
        "eta": 0.01,
        "objective": "binary:logistic",
        "subsample": 0.5,
        "base_score": np.mean(y_train),
        "eval_metric": "logloss",
    }
    model = xgboost.train(
        params,
        d_train,
        5000,
        evals=[(d_test, "test")],
        verbose_eval=100,
        early_stopping_rounds=20,
    )

    tree_explainer = TreeShap(model, model_output="raw", task="classification")
    tree_explainer.fit(X_train)

    if dirname is not None:
        tree_explainer.save(dirname)
    return tree_explainer
Ejemplo n.º 12
0
def make_anchor_tabular_income(
        dirname: Optional[Path] = None) -> AnchorTabular:
    # adapted from:
    # https://docs.seldon.io/projects/alibi/en/latest/examples/anchor_tabular_adult.html
    np.random.seed(0)

    # prepare data
    adult = fetch_adult()
    data = adult.data
    target = adult.target
    feature_names = adult.feature_names
    category_map = adult.category_map

    data_perm = np.random.permutation(np.c_[data, target])
    data = data_perm[:, :-1]
    target = data_perm[:, -1]

    # build model
    idx = 30000
    X_train, Y_train = data[:idx, :], target[:idx]
    X_test, Y_test = data[idx + 1:, :], target[idx + 1:]

    ordinal_features = [
        x for x in range(len(feature_names))
        if x not in list(category_map.keys())
    ]
    ordinal_transformer = Pipeline(steps=[
        ("imputer", SimpleImputer(strategy="median")),
        ("scaler", StandardScaler()),
    ])

    categorical_features = list(category_map.keys())
    categorical_transformer = Pipeline(steps=[
        ("imputer", SimpleImputer(strategy="median")),
        ("onehot", OneHotEncoder(handle_unknown="ignore")),
    ])

    preprocessor = ColumnTransformer(transformers=[
        ("num", ordinal_transformer, ordinal_features),
        ("cat", categorical_transformer, categorical_features),
    ])

    clf = RandomForestClassifier(n_estimators=50)

    model_pipeline = Pipeline(steps=[
        ("preprocess", preprocessor),
        ("classifier", clf),
    ])

    model_pipeline.fit(X_train, Y_train)

    explainer = AnchorTabular(model_pipeline.predict,
                              feature_names,
                              categorical_names=category_map,
                              seed=1)

    explainer.fit(X_train, disc_perc=[25, 50, 75])

    if dirname is not None:
        explainer.save(dirname)
    return explainer
Ejemplo n.º 13
0
# See the License for the specific language governing permissions and
# limitations under the License.

import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from alibi.datasets import fetch_adult
import joblib
import dill
from sklearn.pipeline import Pipeline
import alibi

# load data
adult = fetch_adult()
data = adult.data
targets = adult.target
feature_names = adult.feature_names
category_map = adult.category_map

# define train and test set
np.random.seed(0)
data_perm = np.random.permutation(np.c_[data, targets])
data = data_perm[:, :-1]
labels = data_perm[:, -1]

idx = 30000
X_train, Y_train = data[:idx, :], targets[:idx]
X_test, Y_test = data[idx + 1:, :], targets[idx + 1:]