def validate(DBSCAN_clusters): cluster_group = get_cluster_group(geneIds, list(data_ground_truth)) cluster_group = OrderedDict(sorted(cluster_group.items())) incidence_matrix_gt = get_incidence_matrix(data_ground_truth, cluster_group) cluster_group_DBSCAN = get_cluster_group(geneIds, DBSCAN_clusters) cluster_group_DBSCAN = OrderedDict(sorted(cluster_group_DBSCAN.items())) incidence_matrix_gmm = get_incidence_matrix(DBSCAN_clusters, cluster_group_DBSCAN) categories = get_categories(incidence_matrix_gt, incidence_matrix_gmm) rand = (categories[0][0] + categories[1][1]) / np.sum(categories) jaccard = categories[1][1] / (categories[1][0] + categories[0][1] + categories[1][1]) print("RAND: ", rand) print("Jaccard: ", jaccard)
def coef(gmm_truth): id = np.array(data[:, 0], dtype='int') ground_truth = np.array(data[:, 1], dtype='int') cluster_group = get_cluster_group(id, ground_truth) cluster_group = collections.OrderedDict(sorted(cluster_group.items())) incidence_matrix_gt = get_incidence_matrix(ground_truth, cluster_group) cluster_group_gmm = get_cluster_group(id, gmm_truth) cluster_group_gmm = collections.OrderedDict(sorted(cluster_group_gmm.items())) incidence_matrix_gmm = get_incidence_matrix(gmm_truth, cluster_group_gmm) categories = get_categories(incidence_matrix_gt, incidence_matrix_gmm) rand = (categories[0][0] + categories[1][1]) / np.sum(categories) jaccard = categories[1][1] / (categories[1][0] + categories[0][1] + categories[1][1]) print("Rand: ", rand) print("Jaccard: ", jaccard)
L = np.reshape((D - W), (n_data, n_data)) eig_val, eig_vec = eigh(L) reduced_data = getNbyKMatrix(eig_vec, eig_val) if choice == "hard": init = choose_initial_centroids_by_ids(centers, reduced_data) else: init = "random" clusters = KMeans(n_clusters=no_of_cluster, init=init, n_init=20, max_iter=max_iters).fit_predict(reduced_data) global_truth = data[:, 1] ids = data[:, 0] cluster_group = get_cluster_group(ids, clusters) truth_group = get_cluster_group(ids, global_truth) # pprint(cluster_group, indent=2) kmean_matrix = get_incidence_matrix(clusters, cluster_group) truth_matrix = get_incidence_matrix(global_truth, truth_group) categories = get_categories(kmean_matrix, truth_matrix) rand = (categories[0][0] + categories[1][1]) / np.sum(categories) jaccard = categories[1][1] / (categories[1][0] + categories[0][1] + categories[1][1]) print("Rand Coeff for Spectral algorithm: ", rand) print("Jaccard Coeff for Spectral algorithm: ", jaccard) data_pca = PCA(n_components=2).fit_transform(attr) plot_pca(data_pca, clusters, filename)