Example #1
0
def test_isolationforest():
    # Load data
    X, _ = make_blobs(n_samples=400, centers=[[0, 0], [0, 0]], cluster_std=0.5, n_features=2, random_state=42)
    X_outlier = np.random.RandomState(42).uniform(low=-6, high=6, size=(50, 2))

    # Create and fit model
    model = IsolationForest(random_state=42)
    model.fit(X)

    # Compute counterfactuals
    x = X[0,:]
    y_target = -1
    assert model.predict([x]) == 1

    x_cf, y_cf, _ = generate_counterfactual(model, x, y_target=y_target, return_as_dict=False)
    assert y_cf == y_target
    assert model.predict(np.array([x_cf])) == y_target

    x = X_outlier[1,:]
    y_target = 1
    assert model.predict([x]) == -1

    x_cf, y_cf, _ = generate_counterfactual(model, x, y_target=y_target, return_as_dict=False)
    assert y_cf == y_target
    assert model.predict(np.array([x_cf])) == y_target
Example #2
0
def test_pipeline_pca_linearregression():
    # Load data
    X, y = load_boston(return_X_y=True)
    
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=4242)
    
    # Create and fit model
    pca = PCA(n_components=4)
    model = Lasso()

    model = make_pipeline(pca, model)
    model.fit(X_train, y_train)

    # Select data point for explaining its prediction
    x_orig = X_test[1:4][0,:]
    y_orig_pred = model.predict([x_orig])
    assert y_orig_pred >= 25 and y_orig_pred < 26

    # Compute counterfactual
    y_target = 20.
    y_target_done = lambda z: np.abs(z - y_target) < 3.

    x_cf, y_cf, _ = generate_counterfactual(model, x_orig, y_target=y_target, done=y_target_done, regularization="l1", C=0.1, features_whitelist=None, optimizer="bfgs", return_as_dict=False)
    assert y_target_done(y_cf)
    assert y_target_done(model.predict(np.array([x_cf])))

    x_cf, y_cf, _ = generate_counterfactual(model, x_orig, y_target=y_target, done=y_target_done, regularization="l1", features_whitelist=None, optimizer="mp", return_as_dict=False)
    assert y_target_done(y_cf)
    assert y_target_done(model.predict(np.array([x_cf])))

    x_cf, y_cf, _ = generate_counterfactual(model, x_orig, y_target=y_target, done=y_target_done, regularization="l2", features_whitelist=None, optimizer="mp", return_as_dict=False)
    assert y_target_done(y_cf)
    assert y_target_done(model.predict(np.array([x_cf])))
Example #3
0
def test_isolationforest():
    # Load data
    X, _ = make_blobs(n_samples=400,
                      centers=[[0, 0], [0, 0]],
                      cluster_std=0.5,
                      n_features=2,
                      random_state=42)
    X_outlier = np.random.RandomState(42).uniform(low=-6, high=6, size=(50, 2))

    # Create and fit model
    model = IsolationForest(random_state=42)
    model.fit(X)

    # Compute counterfactuals
    x = X[0, :]
    y_target = -1
    assert model.predict([x]) == 1

    x_cf, y_cf, _ = generate_counterfactual(model,
                                            x,
                                            y_target=y_target,
                                            return_as_dict=False)
    assert y_cf == y_target
    assert model.predict(np.array([x_cf])) == y_target

    x = X_outlier[1, :]
    y_target = 1
    assert model.predict([x]) == -1

    x_cf, y_cf, _ = generate_counterfactual(model,
                                            x,
                                            y_target=y_target,
                                            return_as_dict=False)
    assert y_cf == y_target
    assert model.predict(np.array([x_cf])) == y_target

    cf = generate_counterfactual(model,
                                 x,
                                 y_target=y_target,
                                 return_as_dict=True)
    assert cf["y_cf"] == y_target
    assert model.predict(np.array([cf["x_cf"]])) == y_target

    # Other stuff
    from ceml.sklearn import IsolationForest as IsolationForestCf
    model_cf = IsolationForestCf(model)
    assert model.predict([x]) == model_cf.predict(x)

    with pytest.raises(TypeError):
        IsolationForestCf(sklearn.linear_model.LogisticRegression())
Example #4
0
def test_decisiontree_regressor():
    # Load data
    X, y = load_boston(return_X_y=True)

    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.33,
                                                        random_state=4242)

    # Create and fit model
    model = DecisionTreeRegressor(max_depth=3, random_state=42)
    model.fit(X_train, y_train)

    # Select data point for explaining its prediction
    x_orig = X_test[1:4][0, :]
    y_orig_pred = model.predict([x_orig])
    assert y_orig_pred >= 19. and y_orig_pred < 21.

    # Compute counterfactual
    y_target = 25.
    y_target_done = lambda z: np.abs(z - y_target) < 1.

    features_whitelist = None

    x_cf, y_cf, delta = generate_counterfactual(
        model,
        x_orig,
        y_target_done,
        features_whitelist=features_whitelist,
        regularization="l1",
        return_as_dict=False)
    assert y_target_done(y_cf)
    assert y_target_done(model.predict(np.array([x_cf])))

    features_whitelist = [0, 2, 4, 5, 7, 8, 9, 12]
    x_cf, y_cf, delta = generate_counterfactual(
        model,
        x_orig,
        y_target_done,
        features_whitelist=features_whitelist,
        regularization="l1",
        return_as_dict=False)
    assert y_target_done(y_cf)
    assert y_target_done(model.predict(np.array([x_cf])))
    assert all([
        True if i in features_whitelist else delta[i] == 0.
        for i in range(x_orig.shape[0])
    ])
Example #5
0
def test_decisiontree_classifier():
    # Load data
    X, y = load_iris(return_X_y=True)

    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.33,
                                                        random_state=4242)

    # Create and fit model
    model = DecisionTreeClassifier(max_depth=3, random_state=42)
    model.fit(X_train, y_train)

    # Select data point for explaining its prediction
    x_orig = X_test[1:4][0, :]
    assert model.predict([x_orig]) == 2

    # Compute counterfactual
    features_whitelist = None

    x_cf, y_cf, delta = generate_counterfactual(
        model,
        x_orig,
        0,
        features_whitelist=features_whitelist,
        regularization="l1",
        return_as_dict=False)
    assert y_cf == 0
    assert model.predict(np.array([x_cf])) == 0

    features_whitelist = [0, 2]
    x_cf, y_cf, delta = generate_counterfactual(
        model,
        x_orig,
        0,
        features_whitelist=features_whitelist,
        regularization="l1",
        return_as_dict=False)
    assert y_cf == 0
    assert model.predict(np.array([x_cf])) == 0
    assert all([
        True if i in features_whitelist else delta[i] == 0.
        for i in range(x_orig.shape[0])
    ])
Example #6
0
def test_pipeline_minmaxscaler_softmaxregression():
    # Load data
    X, y = load_iris(return_X_y=True)

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=4242)

    # Create and fit model
    scaler = MinMaxScaler()

    model = LogisticRegression(solver='lbfgs', multi_class='multinomial')
    model = make_pipeline(scaler, model)
    model.fit(X_train, y_train)

    # Select data point for explaining its prediction
    x_orig = X_test[1:4][0,:]
    assert model.predict([x_orig]) == 2

    # Compute counterfactual
    compute_counterfactuals_2(model, x_orig, 0)

    x_cf, y_cf, delta = generate_counterfactual(model, x_orig, 0, features_whitelist=None, optimizer="mp", regularization=None, return_as_dict=False)
    assert y_cf == 0
    assert model.predict(np.array([x_cf])) == 0

    x_cf, y_cf, delta = generate_counterfactual(model, x_orig, 0, features_whitelist=None, optimizer="mp", regularization="l1", return_as_dict=False)
    assert y_cf == 0
    assert model.predict(np.array([x_cf])) == 0

    features_whitelist = [0, 1, 2]
    x_cf, y_cf, delta = generate_counterfactual(model, x_orig, 0, features_whitelist=features_whitelist, optimizer="mp", regularization=None, return_as_dict=False)
    assert y_cf == 0
    assert model.predict(np.array([x_cf])) == 0

    x_cf, y_cf, delta = generate_counterfactual(model, x_orig, 0, features_whitelist=features_whitelist, optimizer="mp", regularization="l1", return_as_dict=False)
    assert y_cf == 0
    assert model.predict(np.array([x_cf])) == 0
Example #7
0
def compute_counterfactuals(model, x, y):
    features_whitelist = None
    
    x_cf, y_cf, delta = generate_counterfactual(model, x, y, features_whitelist=features_whitelist, regularization="l1", C=1.0, optimizer="bfgs", return_as_dict=False)
    assert y_cf == 0
    assert model.predict(np.array([x_cf])) == y

    x_cf, y_cf, delta = generate_counterfactual(model, x, y, features_whitelist=features_whitelist, regularization="l1", C=1.0, optimizer="nelder-mead", return_as_dict=False)
    assert y_cf == y
    assert model.predict(np.array([x_cf])) == y

    x_cf, y_cf, delta = generate_counterfactual(model, x, y, features_whitelist=features_whitelist, regularization=None, optimizer="bfgs", return_as_dict=False)
    assert y_cf == y
    assert model.predict(np.array([x_cf])) == y

    x_cf, y_cf, delta = generate_counterfactual(model, x, y, features_whitelist=features_whitelist, regularization=None, optimizer="nelder-mead", return_as_dict=False)
    assert y_cf == y
    assert model.predict(np.array([x_cf])) == y


    features_whitelist = [1, 2]
    x_cf, y_cf, delta = generate_counterfactual(model, x, y, features_whitelist=features_whitelist, regularization="l1", C=1.0, optimizer="bfgs", return_as_dict=False)
    assert y_cf == y
    assert model.predict(np.array([x_cf])) == y
    assert all([True if i in features_whitelist else delta[i] == 0. for i in range(x.shape[0])])

    x_cf, y_cf, delta = generate_counterfactual(model, x, y, features_whitelist=features_whitelist, regularization="l1", C=1.0, optimizer="nelder-mead", return_as_dict=False)
    assert y_cf == y
    assert model.predict(np.array([x_cf])) == y
    assert all([True if i in features_whitelist else delta[i] == 0. for i in range(x.shape[0])])

    features_whitelist = [0, 1, 2]
    x_cf, y_cf, delta = generate_counterfactual(model, x, 0, features_whitelist=features_whitelist, regularization=None, optimizer="bfgs", return_as_dict=False)
    assert y_cf == y
    assert model.predict(np.array([x_cf])) == y
    assert all([True if i in features_whitelist else delta[i] == 0. for i in range(x.shape[0])])

    x_cf, y_cf, delta = generate_counterfactual(model, x, 0, features_whitelist=features_whitelist, regularization=None, optimizer="nelder-mead", return_as_dict=False)
    assert y_cf == y
    assert model.predict(np.array([x_cf])) == y
    assert all([True if i in features_whitelist else delta[i] == 0. for i in range(x.shape[0])])
Example #8
0
def test_randomforest_classifier():
    # Load data
    X, y = load_iris(True)

    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.33,
                                                        random_state=4242)

    # Create and fit model
    model = RandomForestClassifier(n_estimators=10, random_state=42)
    model.fit(X_train, y_train)

    # Select data point for explaining its prediction
    x_orig = X_test[1:4][0, :]
    assert model.predict([x_orig]) == 2

    # Compute counterfactual
    features_whitelist = None

    x_cf, y_cf, delta = generate_counterfactual(
        model,
        x_orig,
        0,
        features_whitelist=features_whitelist,
        regularization="l1",
        C=0.01,
        optimizer="nelder-mead",
        return_as_dict=False)
    assert y_cf == 0
    assert model.predict(np.array([x_cf])) == 0

    x_cf, y_cf, delta = generate_counterfactual(
        model,
        x_orig,
        0,
        features_whitelist=features_whitelist,
        regularization=None,
        optimizer="nelder-mead",
        return_as_dict=False)
    assert y_cf == 0
    assert model.predict(np.array([x_cf])) == 0

    features_whitelist = [0, 2]

    x_cf, y_cf, delta = generate_counterfactual(
        model,
        x_orig,
        0,
        features_whitelist=features_whitelist,
        regularization="l1",
        C=1.0,
        optimizer="nelder-mead",
        return_as_dict=False)
    assert y_cf == 0
    assert model.predict(np.array([x_cf])) == 0
    assert all([
        True if i in features_whitelist else delta[i] == 0.
        for i in range(x_orig.shape[0])
    ])

    x_cf, y_cf, delta = generate_counterfactual(
        model,
        x_orig,
        0,
        features_whitelist=features_whitelist,
        regularization=None,
        optimizer="nelder-mead",
        return_as_dict=False)
    assert y_cf == 0
    assert model.predict(np.array([x_cf])) == 0
    assert all([
        True if i in features_whitelist else delta[i] == 0.
        for i in range(x_orig.shape[0])
    ])
Example #9
0
def test_glvq():
    # Load data
    X, y = load_iris(return_X_y=True)

    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.33,
                                                        random_state=4242)

    # Create and fit model
    model = GlvqModel(prototypes_per_class=3, max_iter=100, random_state=4242)
    model.fit(X_train, y_train)

    # Select data point for explaining its prediction
    x_orig = X_test[1:4][0, :]
    assert model.predict([x_orig]) == 2

    # Compute counterfactual
    features_whitelist = None

    x_cf, y_cf, delta = generate_counterfactual(
        model,
        x_orig,
        0,
        features_whitelist=features_whitelist,
        regularization="l1",
        optimizer="mp",
        return_as_dict=False)
    assert y_cf == 0
    assert model.predict(np.array([x_cf])) == 0

    x_cf, y_cf, delta = generate_counterfactual(
        model,
        x_orig,
        0,
        features_whitelist=features_whitelist,
        regularization="l2",
        optimizer="mp",
        return_as_dict=False)
    assert y_cf == 0
    assert model.predict(np.array([x_cf])) == 0

    x_cf, y_cf, delta = generate_counterfactual(
        model,
        x_orig,
        0,
        features_whitelist=features_whitelist,
        regularization="l1",
        C=1.0,
        optimizer="bfgs",
        return_as_dict=False)
    assert y_cf == 0
    assert model.predict(np.array([x_cf])) == 0

    x_cf, y_cf, delta = generate_counterfactual(
        model,
        x_orig,
        0,
        features_whitelist=features_whitelist,
        regularization="l1",
        C=1.0,
        optimizer="nelder-mead",
        return_as_dict=False)
    assert y_cf == 0
    assert model.predict(np.array([x_cf])) == 0

    x_cf, y_cf, delta = generate_counterfactual(
        model,
        x_orig,
        0,
        features_whitelist=features_whitelist,
        regularization=None,
        optimizer="bfgs",
        return_as_dict=False)
    assert y_cf == 0
    assert model.predict(np.array([x_cf])) == 0

    x_cf, y_cf, delta = generate_counterfactual(
        model,
        x_orig,
        0,
        features_whitelist=features_whitelist,
        regularization=None,
        optimizer="nelder-mead",
        return_as_dict=False)
    assert y_cf == 0
    assert model.predict(np.array([x_cf])) == 0

    features_whitelist = [0, 1, 2, 3]
    x_cf, y_cf, delta = generate_counterfactual(
        model,
        x_orig,
        0,
        features_whitelist=features_whitelist,
        regularization="l1",
        optimizer="mp",
        return_as_dict=False)
    assert y_cf == 0
    assert model.predict(np.array([x_cf])) == 0
    assert all([
        True if i in features_whitelist else delta[i] == 0.
        for i in range(x_orig.shape[0])
    ])

    x_cf, y_cf, delta = generate_counterfactual(
        model,
        x_orig,
        0,
        features_whitelist=features_whitelist,
        regularization="l2",
        optimizer="mp",
        return_as_dict=False)
    assert y_cf == 0
    assert model.predict(np.array([x_cf])) == 0
    assert all([
        True if i in features_whitelist else delta[i] == 0.
        for i in range(x_orig.shape[0])
    ])

    x_cf, y_cf, delta = generate_counterfactual(
        model,
        x_orig,
        0,
        features_whitelist=features_whitelist,
        regularization="l1",
        C=1.0,
        optimizer="bfgs",
        return_as_dict=False)
    assert y_cf == 0
    assert model.predict(np.array([x_cf])) == 0
    assert all([
        True if i in features_whitelist else delta[i] == 0.
        for i in range(x_orig.shape[0])
    ])

    x_cf, y_cf, delta = generate_counterfactual(
        model,
        x_orig,
        0,
        features_whitelist=features_whitelist,
        regularization="l1",
        C=1.0,
        optimizer="nelder-mead",
        return_as_dict=False)
    assert y_cf == 0
    assert model.predict(np.array([x_cf])) == 0
    assert all([
        True if i in features_whitelist else delta[i] == 0.
        for i in range(x_orig.shape[0])
    ])

    features_whitelist = [0, 2]
    x_cf, y_cf, delta = generate_counterfactual(
        model,
        x_orig,
        0,
        features_whitelist=features_whitelist,
        regularization=None,
        optimizer="nelder-mead",
        return_as_dict=False)
    assert y_cf == 0
    assert model.predict(np.array([x_cf])) == 0
    assert all([
        True if i in features_whitelist else delta[i] == 0.
        for i in range(x_orig.shape[0])
    ])

    # Other stuff
    with pytest.raises(TypeError):
        LvqCf(sklearn.linear_model.LogisticRegression())
def test_softmaxregression():
    # Load data
    X, y = load_iris(True)

    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.33,
                                                        random_state=4242)

    # Create and fit model
    model = LogisticRegression(solver='lbfgs', multi_class='multinomial')
    model.fit(X_train, y_train)

    # Select data point for explaining its prediction
    x_orig = X_test[1:4][0, :]
    assert model.predict([x_orig]) == 2

    # Create weighted manhattan distance cost function
    md = np.median(X_train, axis=0)
    mad = np.median(np.abs(X_train - md), axis=0)
    regularization_mad = LMadCost(x_orig, mad)

    # Compute counterfactual
    features_whitelist = None

    x_cf, y_cf, delta = generate_counterfactual(
        model,
        x_orig,
        0,
        features_whitelist=features_whitelist,
        regularization="l1",
        C=1.0,
        optimizer="bfgs",
        return_as_dict=False)
    assert y_cf == 0
    assert model.predict(np.array([x_cf])) == 0

    x_cf, y_cf, delta = generate_counterfactual(
        model,
        x_orig,
        0,
        features_whitelist=features_whitelist,
        regularization="l1",
        C=1.0,
        optimizer=MyOptimizer(),
        return_as_dict=False)
    assert y_cf == 0
    assert model.predict(np.array([x_cf])) == 0

    x_cf, y_cf, delta = generate_counterfactual(
        model,
        x_orig,
        0,
        features_whitelist=features_whitelist,
        regularization=regularization_mad,
        C=1.0,
        optimizer="bfgs",
        return_as_dict=False)
    assert y_cf == 0
    assert model.predict(np.array([x_cf])) == 0

    x_cf, y_cf, delta = generate_counterfactual(
        model,
        x_orig,
        0,
        features_whitelist=features_whitelist,
        regularization="l1",
        C=1.0,
        optimizer="cg",
        return_as_dict=False)
    assert y_cf == 0
    assert model.predict(np.array([x_cf])) == 0

    x_cf, y_cf, delta = generate_counterfactual(
        model,
        x_orig,
        0,
        features_whitelist=features_whitelist,
        regularization="l1",
        C=1.0,
        optimizer="nelder-mead",
        return_as_dict=False)
    assert y_cf == 0
    assert model.predict(np.array([x_cf])) == 0

    x_cf, y_cf, delta = generate_counterfactual(
        model,
        x_orig,
        0,
        features_whitelist=features_whitelist,
        regularization=None,
        optimizer="bfgs",
        return_as_dict=False)
    assert y_cf == 0
    assert model.predict(np.array([x_cf])) == 0

    x_cf, y_cf, delta = generate_counterfactual(
        model,
        x_orig,
        0,
        features_whitelist=features_whitelist,
        regularization=None,
        optimizer="nelder-mead",
        return_as_dict=False)
    assert y_cf == 0
    assert model.predict(np.array([x_cf])) == 0

    features_whitelist = [1, 2]
    x_cf, y_cf, delta = generate_counterfactual(
        model,
        x_orig,
        0,
        features_whitelist=features_whitelist,
        regularization="l1",
        C=1.0,
        optimizer="bfgs",
        return_as_dict=False)
    assert y_cf == 0
    assert model.predict(np.array([x_cf])) == 0
    assert all([
        True if i in features_whitelist else delta[i] == 0.
        for i in range(x_orig.shape[0])
    ])

    x_cf, y_cf, delta = generate_counterfactual(
        model,
        x_orig,
        0,
        features_whitelist=features_whitelist,
        regularization="l1",
        C=1.0,
        optimizer="nelder-mead",
        return_as_dict=False)
    assert y_cf == 0
    assert model.predict(np.array([x_cf])) == 0
    assert all([
        True if i in features_whitelist else delta[i] == 0.
        for i in range(x_orig.shape[0])
    ])

    features_whitelist = [0, 1, 2]
    x_cf, y_cf, delta = generate_counterfactual(
        model,
        x_orig,
        0,
        features_whitelist=features_whitelist,
        regularization=None,
        optimizer="bfgs",
        return_as_dict=False)
    assert y_cf == 0
    assert model.predict(np.array([x_cf])) == 0
    assert all([
        True if i in features_whitelist else delta[i] == 0.
        for i in range(x_orig.shape[0])
    ])

    x_cf, y_cf, delta = generate_counterfactual(
        model,
        x_orig,
        0,
        features_whitelist=features_whitelist,
        regularization=None,
        optimizer="nelder-mead",
        return_as_dict=False)
    assert y_cf == 0
    assert model.predict(np.array([x_cf])) == 0
    assert all([
        True if i in features_whitelist else delta[i] == 0.
        for i in range(x_orig.shape[0])
    ])
Example #11
0
import cvxpy as cp
from ceml.sklearn import generate_counterfactual
#.......

#model = ......
#x_orig = .....
#y_target = .....

# Change optimization parameters
#opt = ....
opt_args = {
    "epsilon": 10.e-4,
    "solver": cp.SCS,
    "solver_verbosity": False,
    "max_iter": 200
}

# Compute counterfactual explanations
x_cf, y_cf, delta = generate_counterfactual(model,
                                            x_orig,
                                            y_target,
                                            features_whitelist=None,
                                            C=0.1,
                                            regularization="l1",
                                            optimizer=opt,
                                            optimizer_args=opt_args,
                                            return_as_dict=False)
        self.f_grad = f_grad
        self.x0 = x0
        self.tol = tol
        self.max_iter = max_iter

    def is_grad_based(self):
        return True
    
    def __call__(self):
        optimum = minimize(fun=self.f, x0=self.x0, jac=self.f_grad, tol=self.tol, options={'maxiter': self.max_iter}, method="BFGS")
        return np.array(optimum["x"])


if __name__ == "__main__":
    # Load data
    X, y = load_iris(return_X_y=True)

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=4242)

    # Create and fit model
    model = LogisticRegression(solver='lbfgs', multi_class='multinomial')
    model.fit(X_train, y_train)

    # Select data point for explaining its prediction
    x = X_test[1,:]
    print("Prediction on x: {0}".format(model.predict([x])))

    # Compute counterfactual by using our custom optimizer 'MyOptimizer'
    print("\nCompute counterfactual ....")
    print(generate_counterfactual(model, x, y_target=0, optimizer=MyOptimizer(), features_whitelist=None, regularization="l1", C=0.5))
Example #13
0
def test_softmaxregression():
    # Load data
    X, y = load_iris(return_X_y=True)

    # Binary classification problem
    idx = y > 1 # Convert data into a binary problem
    X_, y_ = X[idx,:], y[idx]

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=4242)    # Split data into training and test set

    model = LogisticRegression(solver='lbfgs', multi_class='multinomial')   # Create and fit model
    model.fit(X_train, y_train)

    x_orig = X_test[1:4][0,:]   # Select data point for explaining its prediction
    assert model.predict([x_orig]) == 2

    x_cf, y_cf, delta = generate_counterfactual(model, x_orig, 0, return_as_dict=False) # Compute counterfactual explanation
    assert y_cf == 0
    assert model.predict(np.array([x_cf])) == 0

    cf = generate_counterfactual(model, x_orig, 0, return_as_dict=True) # Compute counterfactual explanation
    assert cf["y_cf"]== 0
    assert model.predict(np.array([cf["x_cf"]])) == 0

    # Multiclass classification problem
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=4242)

    # Create and fit model
    model = LogisticRegression(solver='lbfgs', multi_class='multinomial')
    model.fit(X_train, y_train)

    # Select data point for explaining its prediction
    x_orig = X_test[1:4][0,:]
    assert model.predict([x_orig]) == 2

    # Create weighted manhattan distance cost function
    md = np.median(X_train, axis=0)
    mad = np.median(np.abs(X_train - md), axis=0)
    regularization_mad = LMadCost(x_orig, mad)

    # Compute counterfactual
    features_whitelist = None

    x_cf, y_cf, delta = generate_counterfactual(model, x_orig, 0, return_as_dict=False)
    assert y_cf == 0
    assert model.predict(np.array([x_cf])) == 0

    cf = generate_counterfactual(model, x_orig, 0, return_as_dict=True)
    assert cf["y_cf"] == 0
    assert model.predict(np.array([cf["x_cf"]])) == 0

    x_cf, y_cf, delta = generate_counterfactual(model, x_orig, 0, features_whitelist=features_whitelist, regularization="l1", optimizer="mp", return_as_dict=False)
    assert y_cf == 0
    assert model.predict(np.array([x_cf])) == 0

    x_cf, y_cf, delta = generate_counterfactual(model, x_orig, 0, features_whitelist=features_whitelist, regularization="l2", optimizer="mp", return_as_dict=False)
    assert y_cf == 0
    assert model.predict(np.array([x_cf])) == 0

    x_cf, y_cf, delta = generate_counterfactual(model, x_orig, 0, features_whitelist=features_whitelist, regularization="l1", C=0.1, optimizer="bfgs", return_as_dict=False)
    assert y_cf == 0
    assert model.predict(np.array([x_cf])) == 0

    x_cf, y_cf, delta = generate_counterfactual(model, x_orig, 0, features_whitelist=features_whitelist, regularization="l1", C=0.1, optimizer=MyOptimizer(), return_as_dict=False)
    assert y_cf == 0
    assert model.predict(np.array([x_cf])) == 0

    x_cf, y_cf, delta = generate_counterfactual(model, x_orig, 0, features_whitelist=features_whitelist, regularization=regularization_mad, C=0.1, optimizer="bfgs", return_as_dict=False)
    assert y_cf == 0
    assert model.predict(np.array([x_cf])) == 0

    x_cf, y_cf, delta = generate_counterfactual(model, x_orig, 0, features_whitelist=features_whitelist, regularization="l1", C=0.1, optimizer="cg", return_as_dict=False)
    assert y_cf == 0
    assert model.predict(np.array([x_cf])) == 0

    x_cf, y_cf, delta = generate_counterfactual(model, x_orig, 0, features_whitelist=features_whitelist, regularization="l1", C=1.0, optimizer="nelder-mead", return_as_dict=False)
    assert y_cf == 0
    assert model.predict(np.array([x_cf])) == 0

    x_cf, y_cf, delta = generate_counterfactual(model, x_orig, 0, features_whitelist=features_whitelist, regularization=None, optimizer="bfgs", return_as_dict=False)
    assert y_cf == 0
    assert model.predict(np.array([x_cf])) == 0

    x_cf, y_cf, delta = generate_counterfactual(model, x_orig, 0, features_whitelist=features_whitelist, regularization=None, optimizer="nelder-mead", return_as_dict=False)
    assert y_cf == 0
    assert model.predict(np.array([x_cf])) == 0


    features_whitelist = [1, 2]
    x_cf, y_cf, delta = generate_counterfactual(model, x_orig, 0, features_whitelist=features_whitelist, regularization="l1", optimizer="mp", return_as_dict=False)
    assert y_cf == 0
    assert model.predict(np.array([x_cf])) == 0
    assert all([True if i in features_whitelist else delta[i] <= 1e-5 for i in range(x_orig.shape[0])])

    x_cf, y_cf, delta = generate_counterfactual(model, x_orig, 0, features_whitelist=features_whitelist, regularization="l2", optimizer="mp", return_as_dict=False)
    assert y_cf == 0
    assert model.predict(np.array([x_cf])) == 0
    assert all([True if i in features_whitelist else delta[i] <= 1e-5 for i in range(x_orig.shape[0])])

    x_cf, y_cf, delta = generate_counterfactual(model, x_orig, 0, features_whitelist=features_whitelist, regularization="l1", C=1.0, optimizer="bfgs", return_as_dict=False)
    assert y_cf == 0
    assert model.predict(np.array([x_cf])) == 0
    assert all([True if i in features_whitelist else delta[i] == 0. for i in range(x_orig.shape[0])])

    x_cf, y_cf, delta = generate_counterfactual(model, x_orig, 0, features_whitelist=features_whitelist, regularization="l1", C=1.0, optimizer="nelder-mead", return_as_dict=False)
    assert y_cf == 0
    assert model.predict(np.array([x_cf])) == 0
    assert all([True if i in features_whitelist else delta[i] == 0. for i in range(x_orig.shape[0])])

    features_whitelist = [0, 1, 2]
    x_cf, y_cf, delta = generate_counterfactual(model, x_orig, 0, features_whitelist=features_whitelist, regularization=None, optimizer="bfgs", return_as_dict=False)
    assert y_cf == 0
    assert model.predict(np.array([x_cf])) == 0
    assert all([True if i in features_whitelist else delta[i] == 0. for i in range(x_orig.shape[0])])

    x_cf, y_cf, delta = generate_counterfactual(model, x_orig, 0, features_whitelist=features_whitelist, regularization=None, optimizer="nelder-mead", return_as_dict=False)
    assert y_cf == 0
    assert model.predict(np.array([x_cf])) == 0
    assert all([True if i in features_whitelist else delta[i] == 0. for i in range(x_orig.shape[0])])

    # Test binary case
    X, y = load_iris(return_X_y=True)
    idx = y != 2
    X, y = X[idx, :], y[idx]

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=4242)

    model = LogisticRegression(solver='lbfgs', multi_class='multinomial')
    model.fit(X_train, y_train)

    x_orig = X_test[1:4][0,:]
    assert model.predict([x_orig]) == 0

    features_whitelist = None

    x_cf, y_cf, delta = generate_counterfactual(model, x_orig, 1, features_whitelist=features_whitelist, regularization="l1", optimizer="mp", return_as_dict=False)
    assert y_cf == 1
    assert model.predict(np.array([x_cf])) == 1

    x_orig = X_test[0,:]
    assert model.predict([x_orig]) == 1

    x_cf, y_cf, delta = generate_counterfactual(model, x_orig, 0, features_whitelist=features_whitelist, regularization="l1", optimizer="mp", return_as_dict=False)
    assert y_cf == 0
    assert model.predict(np.array([x_cf])) == 0

    # Other stuff
    from ceml.sklearn import SoftmaxCounterfactual
    with pytest.raises(TypeError):
        SoftmaxCounterfactual(sklearn.linear_model.LinearRegression())
    
    with pytest.raises(ValueError):
        SoftmaxCounterfactual(LogisticRegression(multi_class="ovr"))
Example #14
0
def test_knn_regressor():
    # Load data
    X, y = load_boston(True)

    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.33,
                                                        random_state=4242)

    # Create and fit model
    model = KNeighborsRegressor(n_neighbors=3)
    model.fit(X_train, y_train)

    # Select data point for explaining its prediction
    x_orig = X_test[1:4][0, :]
    y_orig_pred = model.predict([x_orig])
    assert y_orig_pred >= 20. and y_orig_pred <= 21.

    # Compute counterfactual
    y_target = 25.
    y_target_done = lambda z: np.abs(z - y_target) < 2.

    features_whitelist = None

    x_cf, y_cf, delta = generate_counterfactual(
        model,
        x_orig,
        y_target,
        done=y_target_done,
        features_whitelist=features_whitelist,
        regularization="l1",
        C=1.0,
        optimizer="bfgs",
        return_as_dict=False)
    assert y_target_done(y_cf)
    assert y_target_done(model.predict(np.array([x_cf])))

    x_cf, y_cf, delta = generate_counterfactual(
        model,
        x_orig,
        y_target,
        done=y_target_done,
        features_whitelist=features_whitelist,
        regularization="l1",
        C=1.0,
        optimizer="nelder-mead",
        return_as_dict=False)
    assert y_target_done(y_cf)
    assert y_target_done(model.predict(np.array([x_cf])))

    x_cf, y_cf, delta = generate_counterfactual(
        model,
        x_orig,
        y_target,
        done=y_target_done,
        features_whitelist=features_whitelist,
        regularization=None,
        optimizer="bfgs",
        return_as_dict=False)
    assert y_target_done(y_cf)
    assert y_target_done(model.predict(np.array([x_cf])))

    x_cf, y_cf, delta = generate_counterfactual(
        model,
        x_orig,
        y_target,
        done=y_target_done,
        features_whitelist=features_whitelist,
        regularization=None,
        optimizer="nelder-mead",
        return_as_dict=False)
    assert y_target_done(y_cf)
    assert y_target_done(model.predict(np.array([x_cf])))

    features_whitelist = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
    x_cf, y_cf, delta = generate_counterfactual(
        model,
        x_orig,
        y_target,
        done=y_target_done,
        features_whitelist=features_whitelist,
        regularization="l1",
        C=1.0,
        optimizer="bfgs",
        return_as_dict=False)
    assert y_target_done(y_cf)
    assert y_target_done(model.predict(np.array([x_cf])))
    assert all([
        True if i in features_whitelist else delta[i] == 0.
        for i in range(x_orig.shape[0])
    ])

    x_cf, y_cf, delta = generate_counterfactual(
        model,
        x_orig,
        y_target,
        done=y_target_done,
        features_whitelist=features_whitelist,
        regularization="l1",
        C=1.0,
        optimizer="nelder-mead",
        return_as_dict=False)
    assert y_target_done(y_cf)
    assert y_target_done(model.predict(np.array([x_cf])))
    assert all([
        True if i in features_whitelist else delta[i] == 0.
        for i in range(x_orig.shape[0])
    ])

    x_cf, y_cf, delta = generate_counterfactual(
        model,
        x_orig,
        y_target,
        done=y_target_done,
        features_whitelist=features_whitelist,
        regularization=None,
        optimizer="bfgs",
        return_as_dict=False)
    assert y_target_done(y_cf)
    assert y_target_done(model.predict(np.array([x_cf])))
    assert all([
        True if i in features_whitelist else delta[i] == 0.
        for i in range(x_orig.shape[0])
    ])

    x_cf, y_cf, delta = generate_counterfactual(
        model,
        x_orig,
        y_target,
        done=y_target_done,
        features_whitelist=features_whitelist,
        regularization=None,
        optimizer="nelder-mead",
        return_as_dict=False)
    assert y_target_done(y_cf)
    assert y_target_done(model.predict(np.array([x_cf])))
    assert all([
        True if i in features_whitelist else delta[i] == 0.
        for i in range(x_orig.shape[0])
    ])
if __name__ == "__main__":
    # Load data
    X, y = load_iris(return_X_y=True)
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.33,
                                                        random_state=4242)

    # Whitelist of features - list of features we can change/use when computing a counterfactual
    features_whitelist = None  # All features can be used.

    # Create and fit the model
    model = GaussianNB()  # Note that ceml requires: multi_class='multinomial'
    model.fit(X_train, y_train)

    # Select data point for explaining its prediction
    x = X_test[1, :]
    print("Prediction on x: {0}".format(model.predict([x])))

    # Create custom regularization function
    regularization = MyRegularization(x)

    # Compute counterfactual
    print("\nCompute counterfactual ....")
    print(
        generate_counterfactual(model,
                                x,
                                y_target=0,
                                features_whitelist=features_whitelist,
                                regularization=regularization,
                                optimizer="bfgs"))
Example #16
0
def test_qda():
    # Load data
    X, y = load_iris(return_X_y=True)

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=4242)

    # Create and fit model
    model = QuadraticDiscriminantAnalysis(store_covariance=True)
    model.fit(X_train, y_train)

    # Select data point for explaining its prediction
    x_orig = X_test[1:4][0,:]
    assert model.predict([x_orig]) == 2

    # Compute counterfactual
    features_whitelist = None

    x_cf, y_cf, delta = generate_counterfactual(model, x_orig, 0, features_whitelist=features_whitelist, regularization="l1", optimizer="mp", return_as_dict=False)
    assert y_cf == 0
    assert model.predict(np.array([x_cf])) == 0
    
    cf = generate_counterfactual(model, x_orig, 0, features_whitelist=features_whitelist, regularization="l1", optimizer="mp", return_as_dict=True)
    assert cf["y_cf"] == 0
    assert model.predict(np.array([cf["x_cf"]])) == 0

    x_cf, y_cf, delta = generate_counterfactual(model, x_orig, 0, features_whitelist=features_whitelist, regularization="l2", optimizer="mp", return_as_dict=False)
    assert y_cf == 0
    assert model.predict(np.array([x_cf])) == 0

    x_cf, y_cf, delta = generate_counterfactual(model, x_orig, 0, features_whitelist=features_whitelist, regularization="l1", C=1.0, optimizer="bfgs", return_as_dict=False)
    assert y_cf == 0
    assert model.predict(np.array([x_cf])) == 0

    x_cf, y_cf, delta = generate_counterfactual(model, x_orig, 0, features_whitelist=features_whitelist, regularization="l1", C=1.0, optimizer="nelder-mead", return_as_dict=False)
    assert y_cf == 0
    assert model.predict(np.array([x_cf])) == 0

    x_cf, y_cf, delta = generate_counterfactual(model, x_orig, 0, features_whitelist=features_whitelist, regularization=None, optimizer="bfgs", return_as_dict=False)
    assert y_cf == 0
    assert model.predict(np.array([x_cf])) == 0

    x_cf, y_cf, delta = generate_counterfactual(model, x_orig, 0, features_whitelist=features_whitelist, regularization=None, optimizer="nelder-mead", return_as_dict=False)
    assert y_cf == 0
    assert model.predict(np.array([x_cf])) == 0

    features_whitelist = [0, 1, 2]
    x_cf, y_cf, delta = generate_counterfactual(model, x_orig, 0, features_whitelist=features_whitelist, regularization="l1", optimizer="mp", return_as_dict=False)
    assert y_cf == 0
    assert model.predict(np.array([x_cf])) == 0
    assert all([True if i in features_whitelist else delta[i] <= 1e-5 for i in range(x_orig.shape[0])])

    x_cf, y_cf, delta = generate_counterfactual(model, x_orig, 0, features_whitelist=features_whitelist, regularization="l2", optimizer="mp", return_as_dict=False)
    assert y_cf == 0
    assert model.predict(np.array([x_cf])) == 0
    assert all([True if i in features_whitelist else delta[i] <= 1e-3 for i in range(x_orig.shape[0])])

    x_cf, y_cf, delta = generate_counterfactual(model, x_orig, 0, features_whitelist=features_whitelist, regularization="l1", C=0.1, optimizer="bfgs", return_as_dict=False)
    assert y_cf == 0
    assert model.predict(np.array([x_cf])) == 0
    assert all([True if i in features_whitelist else delta[i] == 0. for i in range(x_orig.shape[0])])

    x_cf, y_cf, delta = generate_counterfactual(model, x_orig, 0, features_whitelist=features_whitelist, regularization="l1", C=1.0, optimizer="nelder-mead", return_as_dict=False)
    assert y_cf == 0
    assert model.predict(np.array([x_cf])) == 0
    assert all([True if i in features_whitelist else delta[i] == 0. for i in range(x_orig.shape[0])])

    x_cf, y_cf, delta = generate_counterfactual(model, x_orig, 0, features_whitelist=features_whitelist, regularization=None, optimizer="bfgs", return_as_dict=False)
    assert y_cf == 0
    assert model.predict(np.array([x_cf])) == 0
    assert all([True if i in features_whitelist else delta[i] == 0. for i in range(x_orig.shape[0])])

    x_cf, y_cf, delta = generate_counterfactual(model, x_orig, 0, features_whitelist=features_whitelist, regularization=None, optimizer="nelder-mead", return_as_dict=False)
    assert y_cf == 0
    assert model.predict(np.array([x_cf])) == 0
    assert all([True if i in features_whitelist else delta[i] == 0. for i in range(x_orig.shape[0])])

    # Test binary case
    X, y = load_iris(return_X_y=True)
    idx = y != 2
    X, y = X[idx, :], y[idx]

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=4242)

    model = QuadraticDiscriminantAnalysis(store_covariance=True)
    model.fit(X_train, y_train)

    x_orig = X_test[1:4][0,:]
    assert model.predict([x_orig]) == 0

    features_whitelist = None
    x_cf, y_cf, delta = generate_counterfactual(model, x_orig, y_target=1, features_whitelist=features_whitelist, optimizer="mp", return_as_dict=False)
    assert y_cf == 1
    assert model.predict(np.array([x_cf])) == 1

    cf = generate_counterfactual(model, x_orig, y_target=1, features_whitelist=features_whitelist, optimizer="mp", return_as_dict=True)
    assert cf["y_cf"] == 1
    assert model.predict(np.array([cf["x_cf"]])) == 1

    x_orig = X_test[0,:]
    assert model.predict([x_orig]) == 1

    x_cf, y_cf, delta = generate_counterfactual(model, x_orig, y_target=0, features_whitelist=features_whitelist, optimizer="mp", return_as_dict=False)
    assert y_cf == 0
    assert model.predict(np.array([x_cf])) == 0

    # Other stuff
    from ceml.sklearn import QdaCounterfactual
    with pytest.raises(TypeError):
        QdaCounterfactual(sklearn.linear_model.LogisticRegression())

    model = QuadraticDiscriminantAnalysis()
    model.fit(X_train, y_train)
    with pytest.raises(AttributeError):
        QdaCounterfactual(model)
    with pytest.raises(AttributeError):
        generate_counterfactual(model, x_orig, 0)
Example #17
0
from sklearn.metrics import accuracy_score
from sklearn.tree import DecisionTreeClassifier

from ceml.sklearn import generate_counterfactual

if __name__ == "__main__":
    # Load data
    X, y = load_iris(True)
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.33,
                                                        random_state=4242)

    # Whitelist of features - list of features we can change/use when computing a counterfactual
    features_whitelist = None  # We can use all features

    # Create and fit model
    model = DecisionTreeClassifier(max_depth=3)
    model.fit(X_train, y_train)

    # Select data point for explaining its prediction
    x = X_test[1, :]
    print("Prediction on x: {0}".format(model.predict([x])))

    # Compute counterfactual
    print("\nCompute counterfactual ....")
    print(
        generate_counterfactual(model,
                                x,
                                y_target=0,
                                features_whitelist=features_whitelist))
Example #18
0
def test_lgmlvq_classwise():
    # Load data
    X, y = load_iris(True)

    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.33,
                                                        random_state=4242)

    # Create and fit model
    model = LgmlvqModel(prototypes_per_class=3,
                        classwise=True,
                        max_iter=100,
                        random_state=4242)
    model.fit(X_train, y_train)

    # Select data point for explaining its prediction
    x_orig = X_test[1:4][0, :]
    assert model.predict([x_orig]) == 2

    # Compute counterfactual
    features_whitelist = None

    x_cf, y_cf, delta = generate_counterfactual(
        model,
        x_orig,
        0,
        features_whitelist=features_whitelist,
        regularization="l1",
        C=1.0,
        optimizer="bfgs",
        return_as_dict=False)
    assert y_cf == 0
    assert model.predict(np.array([x_cf])) == 0

    x_cf, y_cf, delta = generate_counterfactual(
        model,
        x_orig,
        0,
        features_whitelist=features_whitelist,
        regularization="l1",
        C=1.0,
        optimizer="nelder-mead",
        return_as_dict=False)
    assert y_cf == 0
    assert model.predict(np.array([x_cf])) == 0

    features_whitelist = [0, 1, 2, 3]

    x_cf, y_cf, delta = generate_counterfactual(
        model,
        x_orig,
        0,
        features_whitelist=features_whitelist,
        regularization="l1",
        C=1.0,
        optimizer="bfgs",
        return_as_dict=False)
    assert y_cf == 0
    assert model.predict(np.array([x_cf])) == 0
    assert all([
        True if i in features_whitelist else delta[i] == 0.
        for i in range(x_orig.shape[0])
    ])

    x_cf, y_cf, delta = generate_counterfactual(
        model,
        x_orig,
        0,
        features_whitelist=features_whitelist,
        regularization="l1",
        C=1.0,
        optimizer="nelder-mead",
        return_as_dict=False)
    assert y_cf == 0
    assert model.predict(np.array([x_cf])) == 0
    assert all([
        True if i in features_whitelist else delta[i] == 0.
        for i in range(x_orig.shape[0])
    ])
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.33,
                                                        random_state=4242)

    # Whitelist of features - list of features we can change/use when computing a counterfactual
    features_whitelist = [0, 1, 2, 3, 4]  # Use the first five features only

    # Create and fit model
    model = Ridge()
    model.fit(X_train, y_train)

    # Select data point for explaining its prediction
    x = X_test[1, :]
    print("Prediction on x: {0}".format(model.predict([x])))

    # Compute counterfactual
    print("\nCompute counterfactual ....")
    y_target = 25.0
    done = lambda z: np.abs(
        y_target - z
    ) <= 0.5  # Since we might not be able to achieve `y_target` exactly, we tell ceml that we are happy if we do not deviate more than 0.5 from it.
    print(
        generate_counterfactual(model,
                                x,
                                y_target=y_target,
                                features_whitelist=features_whitelist,
                                C=1.0,
                                regularization="l2",
                                optimizer="bfgs",
                                done=done))