def do_experiment(title, cols, labels):
    ''' perform the classification experiment with given data '''
    print(f'{title} experiment')
    data = read_cols(cols)

    avg_acc = 0
    avg_cm = np.zeros((num_classes, num_classes))
    relevances = np.empty((NUM_CV, data.shape[1]))
    scores = np.empty((NUM_CV))

    i = 0
    for train, test in KFold(n_splits=NUM_CV).split(data):
        gmlvq = GmlvqModel(prototypes_per_class=[1, 1, 1, 1])
        gmlvq.fit(data[train], labels[train])

        score = gmlvq.score(data[test], labels[test])
        scores[i] = score

        relMatrix = np.dot(np.transpose(gmlvq.omega_), gmlvq.omega_)

        relevances[i] = np.diag(relMatrix)

        label_pred = gmlvq.predict(data[test])
        avg_cm += confusion_matrix(labels[test], label_pred)

        i += 1

    os.makedirs(OUTPUT_DIR, exist_ok=True)

    # normalize confusion matrix
    avg_cm = avg_cm.astype('float') / avg_cm.sum(axis=1)[:, np.newaxis]
    avg_acc = np.mean(scores)

    print(f'mean score: {np.mean(scores)} - variance score: {np.var(scores)}')

    # save confusion matrix figure
    plot_confusion_matrix(title, avg_cm)
    plt.savefig(f'{OUTPUT_DIR}/CM_{title}.pdf')
    plt.clf

    plot_relevances(title, cols, relevances)
    plt.savefig(f'{OUTPUT_DIR}/REL_{title}.pdf')
    plt.clf
예제 #2
0
def test_gmlvq():
    # Load data
    X, y = load_iris(True)

    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.33,
                                                        random_state=4242)

    # Create and fit model
    model = GmlvqModel(prototypes_per_class=3,
                       max_iter=200,
                       random_state=4242,
                       dim=2)
    model.fit(X_train, y_train)

    # Select data point for explaining its prediction
    x_orig = X_test[1:4][0, :]
    assert model.predict([x_orig]) == 2

    # Compute counterfactual
    features_whitelist = None

    x_cf, y_cf, delta = generate_counterfactual(
        model,
        x_orig,
        0,
        features_whitelist=features_whitelist,
        regularization="l1",
        C=0.01,
        optimizer="bfgs",
        return_as_dict=False)
    assert y_cf == 0
    assert model.predict(np.array([x_cf])) == 0

    x_cf, y_cf, delta = generate_counterfactual(
        model,
        x_orig,
        0,
        features_whitelist=features_whitelist,
        regularization="l1",
        C=1.0,
        optimizer="nelder-mead",
        return_as_dict=False)
    assert y_cf == 0
    assert model.predict(np.array([x_cf])) == 0

    x_cf, y_cf, delta = generate_counterfactual(
        model,
        x_orig,
        0,
        features_whitelist=features_whitelist,
        regularization=None,
        optimizer="bfgs",
        return_as_dict=False)
    assert y_cf == 0
    assert model.predict(np.array([x_cf])) == 0

    x_cf, y_cf, delta = generate_counterfactual(
        model,
        x_orig,
        0,
        features_whitelist=features_whitelist,
        regularization=None,
        optimizer="nelder-mead",
        return_as_dict=False)
    assert y_cf == 0
    assert model.predict(np.array([x_cf])) == 0

    features_whitelist = [0, 1, 2, 3]
    x_cf, y_cf, delta = generate_counterfactual(
        model,
        x_orig,
        0,
        features_whitelist=features_whitelist,
        regularization="l1",
        C=0.01,
        optimizer="bfgs",
        return_as_dict=False)
    assert y_cf == 0
    assert model.predict(np.array([x_cf])) == 0
    assert all([
        True if i in features_whitelist else delta[i] == 0.
        for i in range(x_orig.shape[0])
    ])

    x_cf, y_cf, delta = generate_counterfactual(
        model,
        x_orig,
        0,
        features_whitelist=features_whitelist,
        regularization="l1",
        C=1.0,
        optimizer="nelder-mead",
        return_as_dict=False)
    assert y_cf == 0
    assert model.predict(np.array([x_cf])) == 0
    assert all([
        True if i in features_whitelist else delta[i] == 0.
        for i in range(x_orig.shape[0])
    ])

    features_whitelist = [0, 2]
    x_cf, y_cf, delta = generate_counterfactual(
        model,
        x_orig,
        0,
        features_whitelist=features_whitelist,
        regularization=None,
        optimizer="nelder-mead",
        return_as_dict=False)
    assert y_cf == 0
    assert model.predict(np.array([x_cf])) == 0
    assert all([
        True if i in features_whitelist else delta[i] == 0.
        for i in range(x_orig.shape[0])
    ])
        (X,
         np.hstack((np.random.uniform(7, 12, n_samples).reshape(-1, 1),
                    np.array([5 for _ in range(n_samples)]).reshape(-1, 1)))))
    y += [1 for _ in range(n_samples)]
    y = np.array(y)

    from plotting import plot_classification_dataset, export_as_png
    plot_classification_dataset(X, y, show=False)
    export_as_png("toydata.png")

    # Fit model
    model = GmlvqModel(prototypes_per_class=1, random_state=4242)
    model.fit(X, y)

    # Evaluate
    y_pred = model.predict(X)
    y_, y_pred_ = encode_labels(y.reshape(-1, 1), y_pred.reshape(-1, 1))
    print("ROC-AUC: {0}".format(roc_auc_score(y_, y_pred_,
                                              average="weighted")))

    print("Omega\n{0}".format(np.dot(model.omega_.T, model.omega_)))
    print()

    # Compute counterfactual metric
    x_orig = np.array([10.0, 0])
    y_target = 1
    Omega_cf = compute_change_in_distmat_gmlvq(model, x_orig, y_target)[0]
    print("Omega_cf\n{0}".format(Omega_cf))

    plot_distmat(np.abs(np.dot(model.omega_.T, model.omega_)), show=False)
    export_as_png("omega.png")
# GRLVQ
grlvq = GrlvqModel()
grlvq.fit(x, y)
p3 = plt.subplot(233)
p3.set_title('GRLVQ')
plot(grlvq.project(x, 2),
     y, grlvq.predict(x), grlvq.project(grlvq.w_, 2),
     grlvq.c_w_, p3)

# GMLVQ
gmlvq = GmlvqModel()
gmlvq.fit(x, y)
p4 = plt.subplot(234)
p4.set_title('GMLVQ')
plot(gmlvq.project(x, 2),
     y, gmlvq.predict(x), gmlvq.project(gmlvq.w_, 2),
     gmlvq.c_w_, p4)

# LGMLVQ
lgmlvq = LgmlvqModel()
lgmlvq.fit(x, y)
p5 = plt.subplot(235)
elem_set = list(set(lgmlvq.c_w_))
p5.set_title('LGMLVQ 1')
plot(lgmlvq.project(x, 1, 2, True),
     y, lgmlvq.predict(x), lgmlvq.project(np.array([lgmlvq.w_[1]]), 1, 2),
     elem_set.index(lgmlvq.c_w_[1]), p5)
p6 = plt.subplot(236)
p6.set_title('LGMLVQ 2')
plot(lgmlvq.project(x, 6, 2, True),
     y, lgmlvq.predict(x), lgmlvq.project(np.array([lgmlvq.w_[6]]), 6, 2),
예제 #5
0
def get_error(lvq_model: GmlvqModel, x, y) -> float:
    y_ = lvq_model.predict(x)
    errors = 1 - np.mean(y_ == y)
    return errors