def test_isolationforest(): # Load data X, _ = make_blobs(n_samples=400, centers=[[0, 0], [0, 0]], cluster_std=0.5, n_features=2, random_state=42) X_outlier = np.random.RandomState(42).uniform(low=-6, high=6, size=(50, 2)) # Create and fit model model = IsolationForest(random_state=42) model.fit(X) # Compute counterfactuals x = X[0,:] y_target = -1 assert model.predict([x]) == 1 x_cf, y_cf, _ = generate_counterfactual(model, x, y_target=y_target, return_as_dict=False) assert y_cf == y_target assert model.predict(np.array([x_cf])) == y_target x = X_outlier[1,:] y_target = 1 assert model.predict([x]) == -1 x_cf, y_cf, _ = generate_counterfactual(model, x, y_target=y_target, return_as_dict=False) assert y_cf == y_target assert model.predict(np.array([x_cf])) == y_target
def test_pipeline_pca_linearregression(): # Load data X, y = load_boston(return_X_y=True) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=4242) # Create and fit model pca = PCA(n_components=4) model = Lasso() model = make_pipeline(pca, model) model.fit(X_train, y_train) # Select data point for explaining its prediction x_orig = X_test[1:4][0,:] y_orig_pred = model.predict([x_orig]) assert y_orig_pred >= 25 and y_orig_pred < 26 # Compute counterfactual y_target = 20. y_target_done = lambda z: np.abs(z - y_target) < 3. x_cf, y_cf, _ = generate_counterfactual(model, x_orig, y_target=y_target, done=y_target_done, regularization="l1", C=0.1, features_whitelist=None, optimizer="bfgs", return_as_dict=False) assert y_target_done(y_cf) assert y_target_done(model.predict(np.array([x_cf]))) x_cf, y_cf, _ = generate_counterfactual(model, x_orig, y_target=y_target, done=y_target_done, regularization="l1", features_whitelist=None, optimizer="mp", return_as_dict=False) assert y_target_done(y_cf) assert y_target_done(model.predict(np.array([x_cf]))) x_cf, y_cf, _ = generate_counterfactual(model, x_orig, y_target=y_target, done=y_target_done, regularization="l2", features_whitelist=None, optimizer="mp", return_as_dict=False) assert y_target_done(y_cf) assert y_target_done(model.predict(np.array([x_cf])))
def test_isolationforest(): # Load data X, _ = make_blobs(n_samples=400, centers=[[0, 0], [0, 0]], cluster_std=0.5, n_features=2, random_state=42) X_outlier = np.random.RandomState(42).uniform(low=-6, high=6, size=(50, 2)) # Create and fit model model = IsolationForest(random_state=42) model.fit(X) # Compute counterfactuals x = X[0, :] y_target = -1 assert model.predict([x]) == 1 x_cf, y_cf, _ = generate_counterfactual(model, x, y_target=y_target, return_as_dict=False) assert y_cf == y_target assert model.predict(np.array([x_cf])) == y_target x = X_outlier[1, :] y_target = 1 assert model.predict([x]) == -1 x_cf, y_cf, _ = generate_counterfactual(model, x, y_target=y_target, return_as_dict=False) assert y_cf == y_target assert model.predict(np.array([x_cf])) == y_target cf = generate_counterfactual(model, x, y_target=y_target, return_as_dict=True) assert cf["y_cf"] == y_target assert model.predict(np.array([cf["x_cf"]])) == y_target # Other stuff from ceml.sklearn import IsolationForest as IsolationForestCf model_cf = IsolationForestCf(model) assert model.predict([x]) == model_cf.predict(x) with pytest.raises(TypeError): IsolationForestCf(sklearn.linear_model.LogisticRegression())
def test_decisiontree_regressor(): # Load data X, y = load_boston(return_X_y=True) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=4242) # Create and fit model model = DecisionTreeRegressor(max_depth=3, random_state=42) model.fit(X_train, y_train) # Select data point for explaining its prediction x_orig = X_test[1:4][0, :] y_orig_pred = model.predict([x_orig]) assert y_orig_pred >= 19. and y_orig_pred < 21. # Compute counterfactual y_target = 25. y_target_done = lambda z: np.abs(z - y_target) < 1. features_whitelist = None x_cf, y_cf, delta = generate_counterfactual( model, x_orig, y_target_done, features_whitelist=features_whitelist, regularization="l1", return_as_dict=False) assert y_target_done(y_cf) assert y_target_done(model.predict(np.array([x_cf]))) features_whitelist = [0, 2, 4, 5, 7, 8, 9, 12] x_cf, y_cf, delta = generate_counterfactual( model, x_orig, y_target_done, features_whitelist=features_whitelist, regularization="l1", return_as_dict=False) assert y_target_done(y_cf) assert y_target_done(model.predict(np.array([x_cf]))) assert all([ True if i in features_whitelist else delta[i] == 0. for i in range(x_orig.shape[0]) ])
def test_decisiontree_classifier(): # Load data X, y = load_iris(return_X_y=True) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=4242) # Create and fit model model = DecisionTreeClassifier(max_depth=3, random_state=42) model.fit(X_train, y_train) # Select data point for explaining its prediction x_orig = X_test[1:4][0, :] assert model.predict([x_orig]) == 2 # Compute counterfactual features_whitelist = None x_cf, y_cf, delta = generate_counterfactual( model, x_orig, 0, features_whitelist=features_whitelist, regularization="l1", return_as_dict=False) assert y_cf == 0 assert model.predict(np.array([x_cf])) == 0 features_whitelist = [0, 2] x_cf, y_cf, delta = generate_counterfactual( model, x_orig, 0, features_whitelist=features_whitelist, regularization="l1", return_as_dict=False) assert y_cf == 0 assert model.predict(np.array([x_cf])) == 0 assert all([ True if i in features_whitelist else delta[i] == 0. for i in range(x_orig.shape[0]) ])
def test_pipeline_minmaxscaler_softmaxregression(): # Load data X, y = load_iris(return_X_y=True) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=4242) # Create and fit model scaler = MinMaxScaler() model = LogisticRegression(solver='lbfgs', multi_class='multinomial') model = make_pipeline(scaler, model) model.fit(X_train, y_train) # Select data point for explaining its prediction x_orig = X_test[1:4][0,:] assert model.predict([x_orig]) == 2 # Compute counterfactual compute_counterfactuals_2(model, x_orig, 0) x_cf, y_cf, delta = generate_counterfactual(model, x_orig, 0, features_whitelist=None, optimizer="mp", regularization=None, return_as_dict=False) assert y_cf == 0 assert model.predict(np.array([x_cf])) == 0 x_cf, y_cf, delta = generate_counterfactual(model, x_orig, 0, features_whitelist=None, optimizer="mp", regularization="l1", return_as_dict=False) assert y_cf == 0 assert model.predict(np.array([x_cf])) == 0 features_whitelist = [0, 1, 2] x_cf, y_cf, delta = generate_counterfactual(model, x_orig, 0, features_whitelist=features_whitelist, optimizer="mp", regularization=None, return_as_dict=False) assert y_cf == 0 assert model.predict(np.array([x_cf])) == 0 x_cf, y_cf, delta = generate_counterfactual(model, x_orig, 0, features_whitelist=features_whitelist, optimizer="mp", regularization="l1", return_as_dict=False) assert y_cf == 0 assert model.predict(np.array([x_cf])) == 0
def compute_counterfactuals(model, x, y): features_whitelist = None x_cf, y_cf, delta = generate_counterfactual(model, x, y, features_whitelist=features_whitelist, regularization="l1", C=1.0, optimizer="bfgs", return_as_dict=False) assert y_cf == 0 assert model.predict(np.array([x_cf])) == y x_cf, y_cf, delta = generate_counterfactual(model, x, y, features_whitelist=features_whitelist, regularization="l1", C=1.0, optimizer="nelder-mead", return_as_dict=False) assert y_cf == y assert model.predict(np.array([x_cf])) == y x_cf, y_cf, delta = generate_counterfactual(model, x, y, features_whitelist=features_whitelist, regularization=None, optimizer="bfgs", return_as_dict=False) assert y_cf == y assert model.predict(np.array([x_cf])) == y x_cf, y_cf, delta = generate_counterfactual(model, x, y, features_whitelist=features_whitelist, regularization=None, optimizer="nelder-mead", return_as_dict=False) assert y_cf == y assert model.predict(np.array([x_cf])) == y features_whitelist = [1, 2] x_cf, y_cf, delta = generate_counterfactual(model, x, y, features_whitelist=features_whitelist, regularization="l1", C=1.0, optimizer="bfgs", return_as_dict=False) assert y_cf == y assert model.predict(np.array([x_cf])) == y assert all([True if i in features_whitelist else delta[i] == 0. for i in range(x.shape[0])]) x_cf, y_cf, delta = generate_counterfactual(model, x, y, features_whitelist=features_whitelist, regularization="l1", C=1.0, optimizer="nelder-mead", return_as_dict=False) assert y_cf == y assert model.predict(np.array([x_cf])) == y assert all([True if i in features_whitelist else delta[i] == 0. for i in range(x.shape[0])]) features_whitelist = [0, 1, 2] x_cf, y_cf, delta = generate_counterfactual(model, x, 0, features_whitelist=features_whitelist, regularization=None, optimizer="bfgs", return_as_dict=False) assert y_cf == y assert model.predict(np.array([x_cf])) == y assert all([True if i in features_whitelist else delta[i] == 0. for i in range(x.shape[0])]) x_cf, y_cf, delta = generate_counterfactual(model, x, 0, features_whitelist=features_whitelist, regularization=None, optimizer="nelder-mead", return_as_dict=False) assert y_cf == y assert model.predict(np.array([x_cf])) == y assert all([True if i in features_whitelist else delta[i] == 0. for i in range(x.shape[0])])
def test_randomforest_classifier(): # Load data X, y = load_iris(True) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=4242) # Create and fit model model = RandomForestClassifier(n_estimators=10, random_state=42) model.fit(X_train, y_train) # Select data point for explaining its prediction x_orig = X_test[1:4][0, :] assert model.predict([x_orig]) == 2 # Compute counterfactual features_whitelist = None x_cf, y_cf, delta = generate_counterfactual( model, x_orig, 0, features_whitelist=features_whitelist, regularization="l1", C=0.01, optimizer="nelder-mead", return_as_dict=False) assert y_cf == 0 assert model.predict(np.array([x_cf])) == 0 x_cf, y_cf, delta = generate_counterfactual( model, x_orig, 0, features_whitelist=features_whitelist, regularization=None, optimizer="nelder-mead", return_as_dict=False) assert y_cf == 0 assert model.predict(np.array([x_cf])) == 0 features_whitelist = [0, 2] x_cf, y_cf, delta = generate_counterfactual( model, x_orig, 0, features_whitelist=features_whitelist, regularization="l1", C=1.0, optimizer="nelder-mead", return_as_dict=False) assert y_cf == 0 assert model.predict(np.array([x_cf])) == 0 assert all([ True if i in features_whitelist else delta[i] == 0. for i in range(x_orig.shape[0]) ]) x_cf, y_cf, delta = generate_counterfactual( model, x_orig, 0, features_whitelist=features_whitelist, regularization=None, optimizer="nelder-mead", return_as_dict=False) assert y_cf == 0 assert model.predict(np.array([x_cf])) == 0 assert all([ True if i in features_whitelist else delta[i] == 0. for i in range(x_orig.shape[0]) ])
def test_glvq(): # Load data X, y = load_iris(return_X_y=True) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=4242) # Create and fit model model = GlvqModel(prototypes_per_class=3, max_iter=100, random_state=4242) model.fit(X_train, y_train) # Select data point for explaining its prediction x_orig = X_test[1:4][0, :] assert model.predict([x_orig]) == 2 # Compute counterfactual features_whitelist = None x_cf, y_cf, delta = generate_counterfactual( model, x_orig, 0, features_whitelist=features_whitelist, regularization="l1", optimizer="mp", return_as_dict=False) assert y_cf == 0 assert model.predict(np.array([x_cf])) == 0 x_cf, y_cf, delta = generate_counterfactual( model, x_orig, 0, features_whitelist=features_whitelist, regularization="l2", optimizer="mp", return_as_dict=False) assert y_cf == 0 assert model.predict(np.array([x_cf])) == 0 x_cf, y_cf, delta = generate_counterfactual( model, x_orig, 0, features_whitelist=features_whitelist, regularization="l1", C=1.0, optimizer="bfgs", return_as_dict=False) assert y_cf == 0 assert model.predict(np.array([x_cf])) == 0 x_cf, y_cf, delta = generate_counterfactual( model, x_orig, 0, features_whitelist=features_whitelist, regularization="l1", C=1.0, optimizer="nelder-mead", return_as_dict=False) assert y_cf == 0 assert model.predict(np.array([x_cf])) == 0 x_cf, y_cf, delta = generate_counterfactual( model, x_orig, 0, features_whitelist=features_whitelist, regularization=None, optimizer="bfgs", return_as_dict=False) assert y_cf == 0 assert model.predict(np.array([x_cf])) == 0 x_cf, y_cf, delta = generate_counterfactual( model, x_orig, 0, features_whitelist=features_whitelist, regularization=None, optimizer="nelder-mead", return_as_dict=False) assert y_cf == 0 assert model.predict(np.array([x_cf])) == 0 features_whitelist = [0, 1, 2, 3] x_cf, y_cf, delta = generate_counterfactual( model, x_orig, 0, features_whitelist=features_whitelist, regularization="l1", optimizer="mp", return_as_dict=False) assert y_cf == 0 assert model.predict(np.array([x_cf])) == 0 assert all([ True if i in features_whitelist else delta[i] == 0. for i in range(x_orig.shape[0]) ]) x_cf, y_cf, delta = generate_counterfactual( model, x_orig, 0, features_whitelist=features_whitelist, regularization="l2", optimizer="mp", return_as_dict=False) assert y_cf == 0 assert model.predict(np.array([x_cf])) == 0 assert all([ True if i in features_whitelist else delta[i] == 0. for i in range(x_orig.shape[0]) ]) x_cf, y_cf, delta = generate_counterfactual( model, x_orig, 0, features_whitelist=features_whitelist, regularization="l1", C=1.0, optimizer="bfgs", return_as_dict=False) assert y_cf == 0 assert model.predict(np.array([x_cf])) == 0 assert all([ True if i in features_whitelist else delta[i] == 0. for i in range(x_orig.shape[0]) ]) x_cf, y_cf, delta = generate_counterfactual( model, x_orig, 0, features_whitelist=features_whitelist, regularization="l1", C=1.0, optimizer="nelder-mead", return_as_dict=False) assert y_cf == 0 assert model.predict(np.array([x_cf])) == 0 assert all([ True if i in features_whitelist else delta[i] == 0. for i in range(x_orig.shape[0]) ]) features_whitelist = [0, 2] x_cf, y_cf, delta = generate_counterfactual( model, x_orig, 0, features_whitelist=features_whitelist, regularization=None, optimizer="nelder-mead", return_as_dict=False) assert y_cf == 0 assert model.predict(np.array([x_cf])) == 0 assert all([ True if i in features_whitelist else delta[i] == 0. for i in range(x_orig.shape[0]) ]) # Other stuff with pytest.raises(TypeError): LvqCf(sklearn.linear_model.LogisticRegression())
def test_softmaxregression(): # Load data X, y = load_iris(True) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=4242) # Create and fit model model = LogisticRegression(solver='lbfgs', multi_class='multinomial') model.fit(X_train, y_train) # Select data point for explaining its prediction x_orig = X_test[1:4][0, :] assert model.predict([x_orig]) == 2 # Create weighted manhattan distance cost function md = np.median(X_train, axis=0) mad = np.median(np.abs(X_train - md), axis=0) regularization_mad = LMadCost(x_orig, mad) # Compute counterfactual features_whitelist = None x_cf, y_cf, delta = generate_counterfactual( model, x_orig, 0, features_whitelist=features_whitelist, regularization="l1", C=1.0, optimizer="bfgs", return_as_dict=False) assert y_cf == 0 assert model.predict(np.array([x_cf])) == 0 x_cf, y_cf, delta = generate_counterfactual( model, x_orig, 0, features_whitelist=features_whitelist, regularization="l1", C=1.0, optimizer=MyOptimizer(), return_as_dict=False) assert y_cf == 0 assert model.predict(np.array([x_cf])) == 0 x_cf, y_cf, delta = generate_counterfactual( model, x_orig, 0, features_whitelist=features_whitelist, regularization=regularization_mad, C=1.0, optimizer="bfgs", return_as_dict=False) assert y_cf == 0 assert model.predict(np.array([x_cf])) == 0 x_cf, y_cf, delta = generate_counterfactual( model, x_orig, 0, features_whitelist=features_whitelist, regularization="l1", C=1.0, optimizer="cg", return_as_dict=False) assert y_cf == 0 assert model.predict(np.array([x_cf])) == 0 x_cf, y_cf, delta = generate_counterfactual( model, x_orig, 0, features_whitelist=features_whitelist, regularization="l1", C=1.0, optimizer="nelder-mead", return_as_dict=False) assert y_cf == 0 assert model.predict(np.array([x_cf])) == 0 x_cf, y_cf, delta = generate_counterfactual( model, x_orig, 0, features_whitelist=features_whitelist, regularization=None, optimizer="bfgs", return_as_dict=False) assert y_cf == 0 assert model.predict(np.array([x_cf])) == 0 x_cf, y_cf, delta = generate_counterfactual( model, x_orig, 0, features_whitelist=features_whitelist, regularization=None, optimizer="nelder-mead", return_as_dict=False) assert y_cf == 0 assert model.predict(np.array([x_cf])) == 0 features_whitelist = [1, 2] x_cf, y_cf, delta = generate_counterfactual( model, x_orig, 0, features_whitelist=features_whitelist, regularization="l1", C=1.0, optimizer="bfgs", return_as_dict=False) assert y_cf == 0 assert model.predict(np.array([x_cf])) == 0 assert all([ True if i in features_whitelist else delta[i] == 0. for i in range(x_orig.shape[0]) ]) x_cf, y_cf, delta = generate_counterfactual( model, x_orig, 0, features_whitelist=features_whitelist, regularization="l1", C=1.0, optimizer="nelder-mead", return_as_dict=False) assert y_cf == 0 assert model.predict(np.array([x_cf])) == 0 assert all([ True if i in features_whitelist else delta[i] == 0. for i in range(x_orig.shape[0]) ]) features_whitelist = [0, 1, 2] x_cf, y_cf, delta = generate_counterfactual( model, x_orig, 0, features_whitelist=features_whitelist, regularization=None, optimizer="bfgs", return_as_dict=False) assert y_cf == 0 assert model.predict(np.array([x_cf])) == 0 assert all([ True if i in features_whitelist else delta[i] == 0. for i in range(x_orig.shape[0]) ]) x_cf, y_cf, delta = generate_counterfactual( model, x_orig, 0, features_whitelist=features_whitelist, regularization=None, optimizer="nelder-mead", return_as_dict=False) assert y_cf == 0 assert model.predict(np.array([x_cf])) == 0 assert all([ True if i in features_whitelist else delta[i] == 0. for i in range(x_orig.shape[0]) ])
import cvxpy as cp from ceml.sklearn import generate_counterfactual #....... #model = ...... #x_orig = ..... #y_target = ..... # Change optimization parameters #opt = .... opt_args = { "epsilon": 10.e-4, "solver": cp.SCS, "solver_verbosity": False, "max_iter": 200 } # Compute counterfactual explanations x_cf, y_cf, delta = generate_counterfactual(model, x_orig, y_target, features_whitelist=None, C=0.1, regularization="l1", optimizer=opt, optimizer_args=opt_args, return_as_dict=False)
self.f_grad = f_grad self.x0 = x0 self.tol = tol self.max_iter = max_iter def is_grad_based(self): return True def __call__(self): optimum = minimize(fun=self.f, x0=self.x0, jac=self.f_grad, tol=self.tol, options={'maxiter': self.max_iter}, method="BFGS") return np.array(optimum["x"]) if __name__ == "__main__": # Load data X, y = load_iris(return_X_y=True) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=4242) # Create and fit model model = LogisticRegression(solver='lbfgs', multi_class='multinomial') model.fit(X_train, y_train) # Select data point for explaining its prediction x = X_test[1,:] print("Prediction on x: {0}".format(model.predict([x]))) # Compute counterfactual by using our custom optimizer 'MyOptimizer' print("\nCompute counterfactual ....") print(generate_counterfactual(model, x, y_target=0, optimizer=MyOptimizer(), features_whitelist=None, regularization="l1", C=0.5))
def test_softmaxregression(): # Load data X, y = load_iris(return_X_y=True) # Binary classification problem idx = y > 1 # Convert data into a binary problem X_, y_ = X[idx,:], y[idx] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=4242) # Split data into training and test set model = LogisticRegression(solver='lbfgs', multi_class='multinomial') # Create and fit model model.fit(X_train, y_train) x_orig = X_test[1:4][0,:] # Select data point for explaining its prediction assert model.predict([x_orig]) == 2 x_cf, y_cf, delta = generate_counterfactual(model, x_orig, 0, return_as_dict=False) # Compute counterfactual explanation assert y_cf == 0 assert model.predict(np.array([x_cf])) == 0 cf = generate_counterfactual(model, x_orig, 0, return_as_dict=True) # Compute counterfactual explanation assert cf["y_cf"]== 0 assert model.predict(np.array([cf["x_cf"]])) == 0 # Multiclass classification problem X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=4242) # Create and fit model model = LogisticRegression(solver='lbfgs', multi_class='multinomial') model.fit(X_train, y_train) # Select data point for explaining its prediction x_orig = X_test[1:4][0,:] assert model.predict([x_orig]) == 2 # Create weighted manhattan distance cost function md = np.median(X_train, axis=0) mad = np.median(np.abs(X_train - md), axis=0) regularization_mad = LMadCost(x_orig, mad) # Compute counterfactual features_whitelist = None x_cf, y_cf, delta = generate_counterfactual(model, x_orig, 0, return_as_dict=False) assert y_cf == 0 assert model.predict(np.array([x_cf])) == 0 cf = generate_counterfactual(model, x_orig, 0, return_as_dict=True) assert cf["y_cf"] == 0 assert model.predict(np.array([cf["x_cf"]])) == 0 x_cf, y_cf, delta = generate_counterfactual(model, x_orig, 0, features_whitelist=features_whitelist, regularization="l1", optimizer="mp", return_as_dict=False) assert y_cf == 0 assert model.predict(np.array([x_cf])) == 0 x_cf, y_cf, delta = generate_counterfactual(model, x_orig, 0, features_whitelist=features_whitelist, regularization="l2", optimizer="mp", return_as_dict=False) assert y_cf == 0 assert model.predict(np.array([x_cf])) == 0 x_cf, y_cf, delta = generate_counterfactual(model, x_orig, 0, features_whitelist=features_whitelist, regularization="l1", C=0.1, optimizer="bfgs", return_as_dict=False) assert y_cf == 0 assert model.predict(np.array([x_cf])) == 0 x_cf, y_cf, delta = generate_counterfactual(model, x_orig, 0, features_whitelist=features_whitelist, regularization="l1", C=0.1, optimizer=MyOptimizer(), return_as_dict=False) assert y_cf == 0 assert model.predict(np.array([x_cf])) == 0 x_cf, y_cf, delta = generate_counterfactual(model, x_orig, 0, features_whitelist=features_whitelist, regularization=regularization_mad, C=0.1, optimizer="bfgs", return_as_dict=False) assert y_cf == 0 assert model.predict(np.array([x_cf])) == 0 x_cf, y_cf, delta = generate_counterfactual(model, x_orig, 0, features_whitelist=features_whitelist, regularization="l1", C=0.1, optimizer="cg", return_as_dict=False) assert y_cf == 0 assert model.predict(np.array([x_cf])) == 0 x_cf, y_cf, delta = generate_counterfactual(model, x_orig, 0, features_whitelist=features_whitelist, regularization="l1", C=1.0, optimizer="nelder-mead", return_as_dict=False) assert y_cf == 0 assert model.predict(np.array([x_cf])) == 0 x_cf, y_cf, delta = generate_counterfactual(model, x_orig, 0, features_whitelist=features_whitelist, regularization=None, optimizer="bfgs", return_as_dict=False) assert y_cf == 0 assert model.predict(np.array([x_cf])) == 0 x_cf, y_cf, delta = generate_counterfactual(model, x_orig, 0, features_whitelist=features_whitelist, regularization=None, optimizer="nelder-mead", return_as_dict=False) assert y_cf == 0 assert model.predict(np.array([x_cf])) == 0 features_whitelist = [1, 2] x_cf, y_cf, delta = generate_counterfactual(model, x_orig, 0, features_whitelist=features_whitelist, regularization="l1", optimizer="mp", return_as_dict=False) assert y_cf == 0 assert model.predict(np.array([x_cf])) == 0 assert all([True if i in features_whitelist else delta[i] <= 1e-5 for i in range(x_orig.shape[0])]) x_cf, y_cf, delta = generate_counterfactual(model, x_orig, 0, features_whitelist=features_whitelist, regularization="l2", optimizer="mp", return_as_dict=False) assert y_cf == 0 assert model.predict(np.array([x_cf])) == 0 assert all([True if i in features_whitelist else delta[i] <= 1e-5 for i in range(x_orig.shape[0])]) x_cf, y_cf, delta = generate_counterfactual(model, x_orig, 0, features_whitelist=features_whitelist, regularization="l1", C=1.0, optimizer="bfgs", return_as_dict=False) assert y_cf == 0 assert model.predict(np.array([x_cf])) == 0 assert all([True if i in features_whitelist else delta[i] == 0. for i in range(x_orig.shape[0])]) x_cf, y_cf, delta = generate_counterfactual(model, x_orig, 0, features_whitelist=features_whitelist, regularization="l1", C=1.0, optimizer="nelder-mead", return_as_dict=False) assert y_cf == 0 assert model.predict(np.array([x_cf])) == 0 assert all([True if i in features_whitelist else delta[i] == 0. for i in range(x_orig.shape[0])]) features_whitelist = [0, 1, 2] x_cf, y_cf, delta = generate_counterfactual(model, x_orig, 0, features_whitelist=features_whitelist, regularization=None, optimizer="bfgs", return_as_dict=False) assert y_cf == 0 assert model.predict(np.array([x_cf])) == 0 assert all([True if i in features_whitelist else delta[i] == 0. for i in range(x_orig.shape[0])]) x_cf, y_cf, delta = generate_counterfactual(model, x_orig, 0, features_whitelist=features_whitelist, regularization=None, optimizer="nelder-mead", return_as_dict=False) assert y_cf == 0 assert model.predict(np.array([x_cf])) == 0 assert all([True if i in features_whitelist else delta[i] == 0. for i in range(x_orig.shape[0])]) # Test binary case X, y = load_iris(return_X_y=True) idx = y != 2 X, y = X[idx, :], y[idx] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=4242) model = LogisticRegression(solver='lbfgs', multi_class='multinomial') model.fit(X_train, y_train) x_orig = X_test[1:4][0,:] assert model.predict([x_orig]) == 0 features_whitelist = None x_cf, y_cf, delta = generate_counterfactual(model, x_orig, 1, features_whitelist=features_whitelist, regularization="l1", optimizer="mp", return_as_dict=False) assert y_cf == 1 assert model.predict(np.array([x_cf])) == 1 x_orig = X_test[0,:] assert model.predict([x_orig]) == 1 x_cf, y_cf, delta = generate_counterfactual(model, x_orig, 0, features_whitelist=features_whitelist, regularization="l1", optimizer="mp", return_as_dict=False) assert y_cf == 0 assert model.predict(np.array([x_cf])) == 0 # Other stuff from ceml.sklearn import SoftmaxCounterfactual with pytest.raises(TypeError): SoftmaxCounterfactual(sklearn.linear_model.LinearRegression()) with pytest.raises(ValueError): SoftmaxCounterfactual(LogisticRegression(multi_class="ovr"))
def test_knn_regressor(): # Load data X, y = load_boston(True) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=4242) # Create and fit model model = KNeighborsRegressor(n_neighbors=3) model.fit(X_train, y_train) # Select data point for explaining its prediction x_orig = X_test[1:4][0, :] y_orig_pred = model.predict([x_orig]) assert y_orig_pred >= 20. and y_orig_pred <= 21. # Compute counterfactual y_target = 25. y_target_done = lambda z: np.abs(z - y_target) < 2. features_whitelist = None x_cf, y_cf, delta = generate_counterfactual( model, x_orig, y_target, done=y_target_done, features_whitelist=features_whitelist, regularization="l1", C=1.0, optimizer="bfgs", return_as_dict=False) assert y_target_done(y_cf) assert y_target_done(model.predict(np.array([x_cf]))) x_cf, y_cf, delta = generate_counterfactual( model, x_orig, y_target, done=y_target_done, features_whitelist=features_whitelist, regularization="l1", C=1.0, optimizer="nelder-mead", return_as_dict=False) assert y_target_done(y_cf) assert y_target_done(model.predict(np.array([x_cf]))) x_cf, y_cf, delta = generate_counterfactual( model, x_orig, y_target, done=y_target_done, features_whitelist=features_whitelist, regularization=None, optimizer="bfgs", return_as_dict=False) assert y_target_done(y_cf) assert y_target_done(model.predict(np.array([x_cf]))) x_cf, y_cf, delta = generate_counterfactual( model, x_orig, y_target, done=y_target_done, features_whitelist=features_whitelist, regularization=None, optimizer="nelder-mead", return_as_dict=False) assert y_target_done(y_cf) assert y_target_done(model.predict(np.array([x_cf]))) features_whitelist = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] x_cf, y_cf, delta = generate_counterfactual( model, x_orig, y_target, done=y_target_done, features_whitelist=features_whitelist, regularization="l1", C=1.0, optimizer="bfgs", return_as_dict=False) assert y_target_done(y_cf) assert y_target_done(model.predict(np.array([x_cf]))) assert all([ True if i in features_whitelist else delta[i] == 0. for i in range(x_orig.shape[0]) ]) x_cf, y_cf, delta = generate_counterfactual( model, x_orig, y_target, done=y_target_done, features_whitelist=features_whitelist, regularization="l1", C=1.0, optimizer="nelder-mead", return_as_dict=False) assert y_target_done(y_cf) assert y_target_done(model.predict(np.array([x_cf]))) assert all([ True if i in features_whitelist else delta[i] == 0. for i in range(x_orig.shape[0]) ]) x_cf, y_cf, delta = generate_counterfactual( model, x_orig, y_target, done=y_target_done, features_whitelist=features_whitelist, regularization=None, optimizer="bfgs", return_as_dict=False) assert y_target_done(y_cf) assert y_target_done(model.predict(np.array([x_cf]))) assert all([ True if i in features_whitelist else delta[i] == 0. for i in range(x_orig.shape[0]) ]) x_cf, y_cf, delta = generate_counterfactual( model, x_orig, y_target, done=y_target_done, features_whitelist=features_whitelist, regularization=None, optimizer="nelder-mead", return_as_dict=False) assert y_target_done(y_cf) assert y_target_done(model.predict(np.array([x_cf]))) assert all([ True if i in features_whitelist else delta[i] == 0. for i in range(x_orig.shape[0]) ])
if __name__ == "__main__": # Load data X, y = load_iris(return_X_y=True) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=4242) # Whitelist of features - list of features we can change/use when computing a counterfactual features_whitelist = None # All features can be used. # Create and fit the model model = GaussianNB() # Note that ceml requires: multi_class='multinomial' model.fit(X_train, y_train) # Select data point for explaining its prediction x = X_test[1, :] print("Prediction on x: {0}".format(model.predict([x]))) # Create custom regularization function regularization = MyRegularization(x) # Compute counterfactual print("\nCompute counterfactual ....") print( generate_counterfactual(model, x, y_target=0, features_whitelist=features_whitelist, regularization=regularization, optimizer="bfgs"))
def test_qda(): # Load data X, y = load_iris(return_X_y=True) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=4242) # Create and fit model model = QuadraticDiscriminantAnalysis(store_covariance=True) model.fit(X_train, y_train) # Select data point for explaining its prediction x_orig = X_test[1:4][0,:] assert model.predict([x_orig]) == 2 # Compute counterfactual features_whitelist = None x_cf, y_cf, delta = generate_counterfactual(model, x_orig, 0, features_whitelist=features_whitelist, regularization="l1", optimizer="mp", return_as_dict=False) assert y_cf == 0 assert model.predict(np.array([x_cf])) == 0 cf = generate_counterfactual(model, x_orig, 0, features_whitelist=features_whitelist, regularization="l1", optimizer="mp", return_as_dict=True) assert cf["y_cf"] == 0 assert model.predict(np.array([cf["x_cf"]])) == 0 x_cf, y_cf, delta = generate_counterfactual(model, x_orig, 0, features_whitelist=features_whitelist, regularization="l2", optimizer="mp", return_as_dict=False) assert y_cf == 0 assert model.predict(np.array([x_cf])) == 0 x_cf, y_cf, delta = generate_counterfactual(model, x_orig, 0, features_whitelist=features_whitelist, regularization="l1", C=1.0, optimizer="bfgs", return_as_dict=False) assert y_cf == 0 assert model.predict(np.array([x_cf])) == 0 x_cf, y_cf, delta = generate_counterfactual(model, x_orig, 0, features_whitelist=features_whitelist, regularization="l1", C=1.0, optimizer="nelder-mead", return_as_dict=False) assert y_cf == 0 assert model.predict(np.array([x_cf])) == 0 x_cf, y_cf, delta = generate_counterfactual(model, x_orig, 0, features_whitelist=features_whitelist, regularization=None, optimizer="bfgs", return_as_dict=False) assert y_cf == 0 assert model.predict(np.array([x_cf])) == 0 x_cf, y_cf, delta = generate_counterfactual(model, x_orig, 0, features_whitelist=features_whitelist, regularization=None, optimizer="nelder-mead", return_as_dict=False) assert y_cf == 0 assert model.predict(np.array([x_cf])) == 0 features_whitelist = [0, 1, 2] x_cf, y_cf, delta = generate_counterfactual(model, x_orig, 0, features_whitelist=features_whitelist, regularization="l1", optimizer="mp", return_as_dict=False) assert y_cf == 0 assert model.predict(np.array([x_cf])) == 0 assert all([True if i in features_whitelist else delta[i] <= 1e-5 for i in range(x_orig.shape[0])]) x_cf, y_cf, delta = generate_counterfactual(model, x_orig, 0, features_whitelist=features_whitelist, regularization="l2", optimizer="mp", return_as_dict=False) assert y_cf == 0 assert model.predict(np.array([x_cf])) == 0 assert all([True if i in features_whitelist else delta[i] <= 1e-3 for i in range(x_orig.shape[0])]) x_cf, y_cf, delta = generate_counterfactual(model, x_orig, 0, features_whitelist=features_whitelist, regularization="l1", C=0.1, optimizer="bfgs", return_as_dict=False) assert y_cf == 0 assert model.predict(np.array([x_cf])) == 0 assert all([True if i in features_whitelist else delta[i] == 0. for i in range(x_orig.shape[0])]) x_cf, y_cf, delta = generate_counterfactual(model, x_orig, 0, features_whitelist=features_whitelist, regularization="l1", C=1.0, optimizer="nelder-mead", return_as_dict=False) assert y_cf == 0 assert model.predict(np.array([x_cf])) == 0 assert all([True if i in features_whitelist else delta[i] == 0. for i in range(x_orig.shape[0])]) x_cf, y_cf, delta = generate_counterfactual(model, x_orig, 0, features_whitelist=features_whitelist, regularization=None, optimizer="bfgs", return_as_dict=False) assert y_cf == 0 assert model.predict(np.array([x_cf])) == 0 assert all([True if i in features_whitelist else delta[i] == 0. for i in range(x_orig.shape[0])]) x_cf, y_cf, delta = generate_counterfactual(model, x_orig, 0, features_whitelist=features_whitelist, regularization=None, optimizer="nelder-mead", return_as_dict=False) assert y_cf == 0 assert model.predict(np.array([x_cf])) == 0 assert all([True if i in features_whitelist else delta[i] == 0. for i in range(x_orig.shape[0])]) # Test binary case X, y = load_iris(return_X_y=True) idx = y != 2 X, y = X[idx, :], y[idx] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=4242) model = QuadraticDiscriminantAnalysis(store_covariance=True) model.fit(X_train, y_train) x_orig = X_test[1:4][0,:] assert model.predict([x_orig]) == 0 features_whitelist = None x_cf, y_cf, delta = generate_counterfactual(model, x_orig, y_target=1, features_whitelist=features_whitelist, optimizer="mp", return_as_dict=False) assert y_cf == 1 assert model.predict(np.array([x_cf])) == 1 cf = generate_counterfactual(model, x_orig, y_target=1, features_whitelist=features_whitelist, optimizer="mp", return_as_dict=True) assert cf["y_cf"] == 1 assert model.predict(np.array([cf["x_cf"]])) == 1 x_orig = X_test[0,:] assert model.predict([x_orig]) == 1 x_cf, y_cf, delta = generate_counterfactual(model, x_orig, y_target=0, features_whitelist=features_whitelist, optimizer="mp", return_as_dict=False) assert y_cf == 0 assert model.predict(np.array([x_cf])) == 0 # Other stuff from ceml.sklearn import QdaCounterfactual with pytest.raises(TypeError): QdaCounterfactual(sklearn.linear_model.LogisticRegression()) model = QuadraticDiscriminantAnalysis() model.fit(X_train, y_train) with pytest.raises(AttributeError): QdaCounterfactual(model) with pytest.raises(AttributeError): generate_counterfactual(model, x_orig, 0)
from sklearn.metrics import accuracy_score from sklearn.tree import DecisionTreeClassifier from ceml.sklearn import generate_counterfactual if __name__ == "__main__": # Load data X, y = load_iris(True) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=4242) # Whitelist of features - list of features we can change/use when computing a counterfactual features_whitelist = None # We can use all features # Create and fit model model = DecisionTreeClassifier(max_depth=3) model.fit(X_train, y_train) # Select data point for explaining its prediction x = X_test[1, :] print("Prediction on x: {0}".format(model.predict([x]))) # Compute counterfactual print("\nCompute counterfactual ....") print( generate_counterfactual(model, x, y_target=0, features_whitelist=features_whitelist))
def test_lgmlvq_classwise(): # Load data X, y = load_iris(True) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=4242) # Create and fit model model = LgmlvqModel(prototypes_per_class=3, classwise=True, max_iter=100, random_state=4242) model.fit(X_train, y_train) # Select data point for explaining its prediction x_orig = X_test[1:4][0, :] assert model.predict([x_orig]) == 2 # Compute counterfactual features_whitelist = None x_cf, y_cf, delta = generate_counterfactual( model, x_orig, 0, features_whitelist=features_whitelist, regularization="l1", C=1.0, optimizer="bfgs", return_as_dict=False) assert y_cf == 0 assert model.predict(np.array([x_cf])) == 0 x_cf, y_cf, delta = generate_counterfactual( model, x_orig, 0, features_whitelist=features_whitelist, regularization="l1", C=1.0, optimizer="nelder-mead", return_as_dict=False) assert y_cf == 0 assert model.predict(np.array([x_cf])) == 0 features_whitelist = [0, 1, 2, 3] x_cf, y_cf, delta = generate_counterfactual( model, x_orig, 0, features_whitelist=features_whitelist, regularization="l1", C=1.0, optimizer="bfgs", return_as_dict=False) assert y_cf == 0 assert model.predict(np.array([x_cf])) == 0 assert all([ True if i in features_whitelist else delta[i] == 0. for i in range(x_orig.shape[0]) ]) x_cf, y_cf, delta = generate_counterfactual( model, x_orig, 0, features_whitelist=features_whitelist, regularization="l1", C=1.0, optimizer="nelder-mead", return_as_dict=False) assert y_cf == 0 assert model.predict(np.array([x_cf])) == 0 assert all([ True if i in features_whitelist else delta[i] == 0. for i in range(x_orig.shape[0]) ])
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=4242) # Whitelist of features - list of features we can change/use when computing a counterfactual features_whitelist = [0, 1, 2, 3, 4] # Use the first five features only # Create and fit model model = Ridge() model.fit(X_train, y_train) # Select data point for explaining its prediction x = X_test[1, :] print("Prediction on x: {0}".format(model.predict([x]))) # Compute counterfactual print("\nCompute counterfactual ....") y_target = 25.0 done = lambda z: np.abs( y_target - z ) <= 0.5 # Since we might not be able to achieve `y_target` exactly, we tell ceml that we are happy if we do not deviate more than 0.5 from it. print( generate_counterfactual(model, x, y_target=y_target, features_whitelist=features_whitelist, C=1.0, regularization="l2", optimizer="bfgs", done=done))