def do_experiment(title, cols, labels): ''' perform the classification experiment with given data ''' print(f'{title} experiment') data = read_cols(cols) avg_acc = 0 avg_cm = np.zeros((num_classes, num_classes)) relevances = np.empty((NUM_CV, data.shape[1])) scores = np.empty((NUM_CV)) i = 0 for train, test in KFold(n_splits=NUM_CV).split(data): gmlvq = GmlvqModel(prototypes_per_class=[1, 1, 1, 1]) gmlvq.fit(data[train], labels[train]) score = gmlvq.score(data[test], labels[test]) scores[i] = score relMatrix = np.dot(np.transpose(gmlvq.omega_), gmlvq.omega_) relevances[i] = np.diag(relMatrix) label_pred = gmlvq.predict(data[test]) avg_cm += confusion_matrix(labels[test], label_pred) i += 1 os.makedirs(OUTPUT_DIR, exist_ok=True) # normalize confusion matrix avg_cm = avg_cm.astype('float') / avg_cm.sum(axis=1)[:, np.newaxis] avg_acc = np.mean(scores) print(f'mean score: {np.mean(scores)} - variance score: {np.var(scores)}') # save confusion matrix figure plot_confusion_matrix(title, avg_cm) plt.savefig(f'{OUTPUT_DIR}/CM_{title}.pdf') plt.clf plot_relevances(title, cols, relevances) plt.savefig(f'{OUTPUT_DIR}/REL_{title}.pdf') plt.clf
def test_gmlvq(): # Load data X, y = load_iris(True) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=4242) # Create and fit model model = GmlvqModel(prototypes_per_class=3, max_iter=200, random_state=4242, dim=2) model.fit(X_train, y_train) # Select data point for explaining its prediction x_orig = X_test[1:4][0, :] assert model.predict([x_orig]) == 2 # Compute counterfactual features_whitelist = None x_cf, y_cf, delta = generate_counterfactual( model, x_orig, 0, features_whitelist=features_whitelist, regularization="l1", C=0.01, optimizer="bfgs", return_as_dict=False) assert y_cf == 0 assert model.predict(np.array([x_cf])) == 0 x_cf, y_cf, delta = generate_counterfactual( model, x_orig, 0, features_whitelist=features_whitelist, regularization="l1", C=1.0, optimizer="nelder-mead", return_as_dict=False) assert y_cf == 0 assert model.predict(np.array([x_cf])) == 0 x_cf, y_cf, delta = generate_counterfactual( model, x_orig, 0, features_whitelist=features_whitelist, regularization=None, optimizer="bfgs", return_as_dict=False) assert y_cf == 0 assert model.predict(np.array([x_cf])) == 0 x_cf, y_cf, delta = generate_counterfactual( model, x_orig, 0, features_whitelist=features_whitelist, regularization=None, optimizer="nelder-mead", return_as_dict=False) assert y_cf == 0 assert model.predict(np.array([x_cf])) == 0 features_whitelist = [0, 1, 2, 3] x_cf, y_cf, delta = generate_counterfactual( model, x_orig, 0, features_whitelist=features_whitelist, regularization="l1", C=0.01, optimizer="bfgs", return_as_dict=False) assert y_cf == 0 assert model.predict(np.array([x_cf])) == 0 assert all([ True if i in features_whitelist else delta[i] == 0. for i in range(x_orig.shape[0]) ]) x_cf, y_cf, delta = generate_counterfactual( model, x_orig, 0, features_whitelist=features_whitelist, regularization="l1", C=1.0, optimizer="nelder-mead", return_as_dict=False) assert y_cf == 0 assert model.predict(np.array([x_cf])) == 0 assert all([ True if i in features_whitelist else delta[i] == 0. for i in range(x_orig.shape[0]) ]) features_whitelist = [0, 2] x_cf, y_cf, delta = generate_counterfactual( model, x_orig, 0, features_whitelist=features_whitelist, regularization=None, optimizer="nelder-mead", return_as_dict=False) assert y_cf == 0 assert model.predict(np.array([x_cf])) == 0 assert all([ True if i in features_whitelist else delta[i] == 0. for i in range(x_orig.shape[0]) ])
(X, np.hstack((np.random.uniform(7, 12, n_samples).reshape(-1, 1), np.array([5 for _ in range(n_samples)]).reshape(-1, 1))))) y += [1 for _ in range(n_samples)] y = np.array(y) from plotting import plot_classification_dataset, export_as_png plot_classification_dataset(X, y, show=False) export_as_png("toydata.png") # Fit model model = GmlvqModel(prototypes_per_class=1, random_state=4242) model.fit(X, y) # Evaluate y_pred = model.predict(X) y_, y_pred_ = encode_labels(y.reshape(-1, 1), y_pred.reshape(-1, 1)) print("ROC-AUC: {0}".format(roc_auc_score(y_, y_pred_, average="weighted"))) print("Omega\n{0}".format(np.dot(model.omega_.T, model.omega_))) print() # Compute counterfactual metric x_orig = np.array([10.0, 0]) y_target = 1 Omega_cf = compute_change_in_distmat_gmlvq(model, x_orig, y_target)[0] print("Omega_cf\n{0}".format(Omega_cf)) plot_distmat(np.abs(np.dot(model.omega_.T, model.omega_)), show=False) export_as_png("omega.png")
# GRLVQ grlvq = GrlvqModel() grlvq.fit(x, y) p3 = plt.subplot(233) p3.set_title('GRLVQ') plot(grlvq.project(x, 2), y, grlvq.predict(x), grlvq.project(grlvq.w_, 2), grlvq.c_w_, p3) # GMLVQ gmlvq = GmlvqModel() gmlvq.fit(x, y) p4 = plt.subplot(234) p4.set_title('GMLVQ') plot(gmlvq.project(x, 2), y, gmlvq.predict(x), gmlvq.project(gmlvq.w_, 2), gmlvq.c_w_, p4) # LGMLVQ lgmlvq = LgmlvqModel() lgmlvq.fit(x, y) p5 = plt.subplot(235) elem_set = list(set(lgmlvq.c_w_)) p5.set_title('LGMLVQ 1') plot(lgmlvq.project(x, 1, 2, True), y, lgmlvq.predict(x), lgmlvq.project(np.array([lgmlvq.w_[1]]), 1, 2), elem_set.index(lgmlvq.c_w_[1]), p5) p6 = plt.subplot(236) p6.set_title('LGMLVQ 2') plot(lgmlvq.project(x, 6, 2, True), y, lgmlvq.predict(x), lgmlvq.project(np.array([lgmlvq.w_[6]]), 6, 2),
def get_error(lvq_model: GmlvqModel, x, y) -> float: y_ = lvq_model.predict(x) errors = 1 - np.mean(y_ == y) return errors