Esempio n. 1
0
def validate(DBSCAN_clusters):
    cluster_group = get_cluster_group(geneIds, list(data_ground_truth))
    cluster_group = OrderedDict(sorted(cluster_group.items()))
    incidence_matrix_gt = get_incidence_matrix(data_ground_truth, cluster_group)
    cluster_group_DBSCAN = get_cluster_group(geneIds, DBSCAN_clusters)
    cluster_group_DBSCAN = OrderedDict(sorted(cluster_group_DBSCAN.items()))
    incidence_matrix_gmm = get_incidence_matrix(DBSCAN_clusters, cluster_group_DBSCAN)
    categories = get_categories(incidence_matrix_gt, incidence_matrix_gmm)

    rand = (categories[0][0] + categories[1][1]) / np.sum(categories)
    jaccard = categories[1][1] / (categories[1][0] + categories[0][1] + categories[1][1])

    print("RAND: ", rand)
    print("Jaccard: ", jaccard)
def coef(gmm_truth):
    id = np.array(data[:, 0], dtype='int')
    ground_truth = np.array(data[:, 1], dtype='int')
    cluster_group = get_cluster_group(id, ground_truth)
    cluster_group = collections.OrderedDict(sorted(cluster_group.items()))
    incidence_matrix_gt = get_incidence_matrix(ground_truth, cluster_group)

    cluster_group_gmm = get_cluster_group(id, gmm_truth)
    cluster_group_gmm = collections.OrderedDict(sorted(cluster_group_gmm.items()))
    incidence_matrix_gmm = get_incidence_matrix(gmm_truth, cluster_group_gmm)

    categories = get_categories(incidence_matrix_gt, incidence_matrix_gmm)

    rand = (categories[0][0] + categories[1][1]) / np.sum(categories)
    jaccard = categories[1][1] / (categories[1][0] + categories[0][1] + categories[1][1])

    print("Rand: ", rand)
    print("Jaccard: ", jaccard)
L = np.reshape((D - W), (n_data, n_data))

eig_val, eig_vec = eigh(L)

reduced_data = getNbyKMatrix(eig_vec, eig_val)
if choice == "hard":
    init = choose_initial_centroids_by_ids(centers, reduced_data)
else:
    init = "random"
clusters = KMeans(n_clusters=no_of_cluster,
                  init=init,
                  n_init=20,
                  max_iter=max_iters).fit_predict(reduced_data)
global_truth = data[:, 1]
ids = data[:, 0]
cluster_group = get_cluster_group(ids, clusters)
truth_group = get_cluster_group(ids, global_truth)
# pprint(cluster_group, indent=2)
kmean_matrix = get_incidence_matrix(clusters, cluster_group)
truth_matrix = get_incidence_matrix(global_truth, truth_group)
categories = get_categories(kmean_matrix, truth_matrix)

rand = (categories[0][0] + categories[1][1]) / np.sum(categories)
jaccard = categories[1][1] / (categories[1][0] + categories[0][1] +
                              categories[1][1])

print("Rand Coeff for Spectral algorithm: ", rand)
print("Jaccard Coeff for Spectral algorithm: ", jaccard)
data_pca = PCA(n_components=2).fit_transform(attr)
plot_pca(data_pca, clusters, filename)