Esempio n. 1
0
def evaluation_scores(groundtruth, labels_pred):
    """
    Eval scores of the predicted results.
     
    :param: groundtruth (type list): the groundtruth (GT) of cluster assignment. Each element denotes an item's GT cluster_id. 
    :param: labels_pred (type list): the predicted cluster assignments. Each element denotes an item's predicted cluster_id.
    """
    NMI = metrics.normalized_mutual_info_score(groundtruth,labels_pred)
    A = metrics.accuracy(groundtruth,labels_pred)
    F1 = metrics.f_measure(groundtruth,labels_pred)
    P = metrics.purity(groundtruth,labels_pred)
    RI = metrics.random_index(groundtruth,labels_pred)
    ARI = metrics.adjusted_rand_score(groundtruth,labels_pred)
    map_pairs = metrics.get_map_pairs(groundtruth,labels_pred)
    return NMI, A, F1, P, RI, ARI, map_pairs
Esempio n. 2
0
# casestudy_iris_pca.py

import data_iris
import hierarchical
import matplotlib.pyplot as plt
import metrics
import numpy as np
import pca
import plot_data

# (1) load data
iris = data_iris.iris()
X,class_label = iris.load()
# perform pca and reduce dimension to 2
model_pca = pca.pca()
model_pca.fit(X)
R = model_pca.data_reduced_dimension(reduced_dim=2)
plot_data.plot_scatter_class(R,class_label,"Iris Data Projected to 2 Dimensions using PCA","u0","u1")
# (2) create model
model = hierarchical.hierarchical()
# (3) fit model
model.fit(R)
print("Time fit: {}".format(model.time_fit))
# (4) results
level = -3
print("Purity: {}".format(metrics.purity(model.clustersave[level],class_label)))
print("Davies-Bouldin: {}".format(metrics.davies_bouldin(R,model.clustersave[level])))
print("Silhouette: {}".format(metrics.silhouette(R,model.clustersave[level])))
model.plot_cluster(nlevel=level,title="Hierarchical Clustering for Iris Dataset reduced to 2d",xlabel="u0",ylabel="u1")
metrics.plot_cluster_distribution(model.clustersave[level],class_label)
plt.show()
Esempio n. 3
0
        start = time.time()
        print("# Tuning hyper-parameters for", score, "\n")
        clf = GridSearchCV(LinearSVC(C=1, max_iter=1000),
                           param_grid,
                           cv=5,
                           scoring='%s' % score)
        clf.fit(x_train, y_train)
        print("Best parameters set found on development set:\n")
        print(clf.best_params_)
        print("Best value for ", score, ":\n")
        print(clf.best_score_)
        Y_true, Y_pred = y_test, clf.predict(x_test)

        print("Report")
        print(classification_report(Y_true, Y_pred, digits=6))
        print(metrics.purity(Y_true, Y_pred))
        print(metrics.entropy(Y_true, Y_pred))
        print("Accuracy: ", clf.score(x_test, y_test))
        print("Time taken:", time.time() - start, "\n")
    endtime = time.time()
    print("svc time cost: ", time.time() - start)
    print("Total time taken: ", endtime - start, "seconds.")
    print("********************************************************")

    import matplotlib.pyplot as plt
    from mpl_toolkits.mplot3d import Axes3D
    from matplotlib.ticker import NullFormatter

    from sklearn import manifold, datasets
    import numpy as np
    import matplotlib