def dbscanClusterValidation(groundTruthMap, geneList, clusterList, noiseList, file_name): dbscanAlgoClusterMap = {} size = len(geneList) ExternalIndex_Computation.computeAlgoClusterMap(clusterList, dbscanAlgoClusterMap, noiseList) groundTruthClusterMatrix = np.zeros((size, size), dtype=np.int) algoClusterMatrix = np.zeros((size, size), dtype=np.int) ExternalIndex_Computation.computeGroundTruthAndAlgoClusterMatrix( groundTruthMap, geneList, groundTruthClusterMatrix, dbscanAlgoClusterMap, algoClusterMatrix) ExternalIndex_Computation.computeJaccardCoefficient( groundTruthClusterMatrix, algoClusterMatrix, size) lab = [] new_list = [] for i in range(0, len(clusterList)): for j in range(0, len(clusterList[i])): new_list.append(geneList[clusterList[i][j] - 1]) k = 1 for i in range(0, len(geneList)): lab.append(dbscanAlgoClusterMap[k]) k += 1 no_of_clusters = len(np.unique(labels)) for i in range(len(labels)): if labels[i] == -1: no_of_clusters += 1 graph_label = "DBScan Result (" + file_name + ")" pca(geneList, lab, graph_label, len(clusterList))
def createGeneList(file, groundTruthMap, geneList, file_name): k=1 for line in iter(file): record = line.strip().split("\t") groundTruthMap[k] = int(record[1]) labels.append(int(record[1])) k = k+1 floatRec =[] for r in range(2,len(record)): num = float(record[r]) floatRec.append(num) geneList.append(floatRec) no_of_clusters = int(np.unique(int(record[1]))) graph_label = "Hierarchical Original (" + file_name + ")" pca(geneList, labels, graph_label, no_of_clusters)
def createInputMatrix(file, groundTruthMap, file_name): geneList = [] k = 1 for line in iter(file): record = line.strip().split("\t") groundTruthMap[k] = int(record[1]) labels.append(int(record[1])) k += 1 list = [] for i in range(2, len(record)): list.append(float(record[i])) geneList.append(list) no_of_clusters = int(np.unique(int(record[1]))) #file.close() graph_label = "DBScan Original (" + file_name + ")" pca(geneList, labels, graph_label, no_of_clusters) return geneList
def checkConvergence(final_centroid, list_final_clusters): """ Computes and checks if the new centroids are same as the old centroid. If yes, the algorithm converges; else it again iterates over euclidean_distance function ......................................................................................... Args: {Input}: final_centroid, Computed new centroids of the data point {Input}: list_final_clusters, List containing gene ids of every cluster Returns: {Output}: Computes the pca graph and passes values to compute external index results if the results converges, else iterates from the euclidean distance """ #print("new cent.................",final_centroid) #print("old...............", old_centroid) answer = np.array_equal(final_centroid, old_centroid) global count if answer == False and no_of_iterations != count: count +=1 old_centroid.clear() for i in range(0, len(final_centroid)): old_centroid.append(final_centroid[i]) final_centroid.clear() euclidean_distance(brr, old_centroid) else: #print(count) print("Cluster list: ", list_final_clusters) graph_label = "K-means: Result("+ file_name+")" j=1 labels_algo = [] for i in range(0, len(brr)): for j in range(0, len(list_final_clusters)): if i in list_final_clusters[j]: labels_algo.append(j+1) pca(brr, labels_algo,graph_label, no_of_clusters) list_final_clusters1 = [] for i in range (0, len(list_final_clusters)): templist = [] for j in range(0,len(list_final_clusters[i])): templist.append(list_final_clusters[i][j]+1) list_final_clusters1.append(templist) kmeansClusterValidation(groundtruth_map,brr,list_final_clusters1)
def hcaClusterValidation(groundTruthMap, geneList, hcaClusters, file_name): hcaAlgoClusterMap = {} noiseList =[] size = len(geneList) ExternalIndex_Computation.computeAlgoClusterMap(hcaClusters, hcaAlgoClusterMap,noiseList) groundTruthClusterMatrix = np.zeros((size,size), dtype=np.int) algoClusterMatrix = np.zeros((size,size), dtype=np.int) ExternalIndex_Computation.computeGroundTruthAndAlgoClusterMatrix(groundTruthMap, geneList, groundTruthClusterMatrix, hcaAlgoClusterMap,algoClusterMatrix) ExternalIndex_Computation.computeJaccardCoefficient(groundTruthClusterMatrix,algoClusterMatrix,size) no_of_clusters = len(hcaClusters) graph_label = "Hierarchical Result (" + file_name + ")" lab = [] new_list = [] for i in range(0, len(hcaClusters)): for j in range(0, len(hcaClusters[i])): new_list.append(geneList[hcaClusters[i][j] - 1]) for i in range(0, len(hcaClusters)): for j in range(0, len(hcaClusters[i])): lab.append(hcaAlgoClusterMap[hcaClusters[i][j]]) pca(new_list, lab, graph_label, no_of_clusters)
old_centroid = arr[initial_clusters][:,2:] old_centroid = old_centroid.tolist() # Get the all ground truth labels labels = arr[:,1] labels = labels.astype(np.int64) global unique_lab unique_lab = np.unique(labels) # All unique labels unique_length = len(np.unique(labels)) for i in range(0,len(unique_lab)): if unique_lab[i] == -1: unique_length = len(unique_lab) -1 # Calculate pca of original data set pca(brr, labels, graph_label, unique_length) unique_length = no_of_clusters #random = np.random.randint(0,arr.shape[0]-1, unique_length) # [378, 55, 51, 35, 237] [464,246,261,397,396,84,67,361,412,247,4], [505,364,145,456,60,41,472,366,460,449,4] groundtruth_map = {} k = 1 for i in range(0,len(arr)): groundtruth_map[k] = int(arr[i][1]) k += 1 # Calculate euclidean distance start_time = time.time() euclidean_distance(brr, old_centroid) print("--- %s seconds ---" % (time.time() - start_time))