def dbscanClusterValidation(groundTruthMap, geneList, clusterList, noiseList,
                            file_name):
    dbscanAlgoClusterMap = {}
    size = len(geneList)
    ExternalIndex_Computation.computeAlgoClusterMap(clusterList,
                                                    dbscanAlgoClusterMap,
                                                    noiseList)
    groundTruthClusterMatrix = np.zeros((size, size), dtype=np.int)
    algoClusterMatrix = np.zeros((size, size), dtype=np.int)
    ExternalIndex_Computation.computeGroundTruthAndAlgoClusterMatrix(
        groundTruthMap, geneList, groundTruthClusterMatrix,
        dbscanAlgoClusterMap, algoClusterMatrix)
    ExternalIndex_Computation.computeJaccardCoefficient(
        groundTruthClusterMatrix, algoClusterMatrix, size)

    lab = []
    new_list = []
    for i in range(0, len(clusterList)):
        for j in range(0, len(clusterList[i])):
            new_list.append(geneList[clusterList[i][j] - 1])
    k = 1
    for i in range(0, len(geneList)):
        lab.append(dbscanAlgoClusterMap[k])
        k += 1

    no_of_clusters = len(np.unique(labels))
    for i in range(len(labels)):
        if labels[i] == -1:
            no_of_clusters += 1

    graph_label = "DBScan Result (" + file_name + ")"
    pca(geneList, lab, graph_label, len(clusterList))
def createGeneList(file, groundTruthMap, geneList, file_name):
    k=1
    for line in iter(file):
        record = line.strip().split("\t")
        groundTruthMap[k] = int(record[1])
        labels.append(int(record[1]))
        k = k+1
        floatRec =[]
        for r in range(2,len(record)):
            num = float(record[r])
            floatRec.append(num)
        geneList.append(floatRec)
    no_of_clusters = int(np.unique(int(record[1])))
    graph_label = "Hierarchical Original (" + file_name + ")"
    pca(geneList, labels, graph_label, no_of_clusters)
def createInputMatrix(file, groundTruthMap, file_name):
    geneList = []
    k = 1
    for line in iter(file):
        record = line.strip().split("\t")
        groundTruthMap[k] = int(record[1])
        labels.append(int(record[1]))
        k += 1
        list = []
        for i in range(2, len(record)):
            list.append(float(record[i]))
        geneList.append(list)
    no_of_clusters = int(np.unique(int(record[1])))
    #file.close()
    graph_label = "DBScan Original (" + file_name + ")"
    pca(geneList, labels, graph_label, no_of_clusters)
    return geneList
Beispiel #4
0
def checkConvergence(final_centroid, list_final_clusters):
    """
    Computes and checks if the new centroids are same as the old centroid. If yes, the algorithm converges; else
    it again iterates over euclidean_distance function
    .........................................................................................
        Args:
            {Input}: final_centroid, Computed new centroids of the data point
            {Input}: list_final_clusters, List containing gene ids of every cluster
        Returns:
            {Output}: Computes the pca graph and passes values to compute external index results if the results
            converges, else iterates from the euclidean distance
    """
    #print("new cent.................",final_centroid)
    #print("old...............", old_centroid)
    answer = np.array_equal(final_centroid, old_centroid)
    global count
    if answer == False and no_of_iterations != count:
        count +=1
        old_centroid.clear()
        for i in range(0, len(final_centroid)):
            old_centroid.append(final_centroid[i])
        final_centroid.clear()
        euclidean_distance(brr, old_centroid)
    else:
        #print(count)
        print("Cluster list: ", list_final_clusters)
        graph_label = "K-means: Result("+ file_name+")"

        j=1
        labels_algo = []
        for i in range(0, len(brr)):
            for j in range(0, len(list_final_clusters)):
                if i in list_final_clusters[j]:
                    labels_algo.append(j+1)

        pca(brr, labels_algo,graph_label, no_of_clusters)
        list_final_clusters1 = []
        for i in range (0, len(list_final_clusters)):
            templist = []
            for j in range(0,len(list_final_clusters[i])):
                templist.append(list_final_clusters[i][j]+1)
            list_final_clusters1.append(templist)

        kmeansClusterValidation(groundtruth_map,brr,list_final_clusters1)
def hcaClusterValidation(groundTruthMap, geneList, hcaClusters, file_name):
    hcaAlgoClusterMap = {}
    noiseList =[]
    size = len(geneList)
    ExternalIndex_Computation.computeAlgoClusterMap(hcaClusters, hcaAlgoClusterMap,noiseList)
    groundTruthClusterMatrix = np.zeros((size,size), dtype=np.int)
    algoClusterMatrix = np.zeros((size,size), dtype=np.int)
    ExternalIndex_Computation.computeGroundTruthAndAlgoClusterMatrix(groundTruthMap, geneList, groundTruthClusterMatrix, hcaAlgoClusterMap,algoClusterMatrix)
    ExternalIndex_Computation.computeJaccardCoefficient(groundTruthClusterMatrix,algoClusterMatrix,size)

    no_of_clusters = len(hcaClusters)
    graph_label = "Hierarchical Result (" + file_name + ")"
    lab = []
    new_list = []
    for i in range(0, len(hcaClusters)):
        for j in range(0, len(hcaClusters[i])):
            new_list.append(geneList[hcaClusters[i][j] - 1])

    for i in range(0, len(hcaClusters)):
        for j in range(0, len(hcaClusters[i])):
            lab.append(hcaAlgoClusterMap[hcaClusters[i][j]])

    pca(new_list, lab, graph_label, no_of_clusters)
Beispiel #6
0
old_centroid = arr[initial_clusters][:,2:]
old_centroid = old_centroid.tolist()

# Get the all ground truth labels
labels = arr[:,1]
labels = labels.astype(np.int64)
global unique_lab
unique_lab = np.unique(labels) # All unique labels

unique_length = len(np.unique(labels))
for i in range(0,len(unique_lab)):
    if unique_lab[i] == -1:
        unique_length = len(unique_lab) -1

# Calculate pca of original data set
pca(brr, labels, graph_label, unique_length)
unique_length = no_of_clusters
#random = np.random.randint(0,arr.shape[0]-1, unique_length) # [378, 55, 51, 35, 237] [464,246,261,397,396,84,67,361,412,247,4], [505,364,145,456,60,41,472,366,460,449,4]

groundtruth_map = {}
k = 1
for i in range(0,len(arr)):
    groundtruth_map[k] = int(arr[i][1])
    k += 1

# Calculate euclidean distance
start_time = time.time()
euclidean_distance(brr, old_centroid)
print("--- %s seconds ---" % (time.time() - start_time))