def clust(self, clusters, cluster_id): # initialize the distance matrix dist_matrix = [[10000] * len(clusters) for i in range(len(clusters))] # save the smallest value min_val = 10000 imin = -1 jmin = -1 # fill out the distance matrix (lower half) for i in range(len(clusters)): j = i - 1 while j >= 0: # compute pairwise distances b/n clusters pairwise_distances = clusters[i] - clusters[j] # fill out distance matrix depending on the link if self.link == "S": d = min(pairwise_distances) elif self.link == "C": d = max(pairwise_distances) elif self.link == "A": d = float( sum(pairwise_distances)) / len(pairwise_distances) dist_matrix[i][j] = d # save info regarding if d < min_val: min_val = d imin = i jmin = j # decrement j j -= 1 # make new cluster based off new information new_cluster = cluster_node(id=cluster_id) new_cluster.add_children(clusters[imin]) new_cluster.add_children(clusters[jmin]) new_cluster.set_height(min_val) # add new cluster union, remove unioned clusters clusters.append(new_cluster) clusters.remove(clusters[imin]) clusters.remove(clusters[jmin])
def clust(self, clusters, cluster_id): # initialize the distance matrix dist_matrix = [[10000]*len(clusters) for i in range(len(clusters))] # save the smallest value min_val = 10000 imin = -1 jmin = -1 # fill out the distance matrix (lower half) for i in range(len(clusters)): j = i - 1 while j >= 0: # compute pairwise distances b/n clusters pairwise_distances = clusters[i] - clusters[j] # fill out distance matrix depending on the link if self.link == "S": d = min(pairwise_distances) elif self.link == "C": d = max(pairwise_distances) elif self.link == "A": d = float( sum(pairwise_distances) ) / len(pairwise_distances) dist_matrix[i][j] = d # save info regarding if d < min_val: min_val = d imin = i jmin = j # decrement j j -= 1 # make new cluster based off new information new_cluster = cluster_node(id = cluster_id) new_cluster.add_children(clusters[imin]) new_cluster.add_children(clusters[jmin]) new_cluster.set_height(min_val) # add new cluster union, remove unioned clusters clusters.append(new_cluster) clusters.remove(clusters[imin]) clusters.remove(clusters[jmin])
def hclust(self, genes): # initialize the leaf clusters based off the genes # 1st value is gene id, 2nd value is gene description, 3rd value is vector of expr values clusters = [cluster_node(g[0], g[1], map(float, g[2:])) for g in genes] # identifier for cluster merges i = len(clusters) + 1 # run clustering algorithm until there is only k nodes left (specified by user while len(clusters) > self.k: # merge two nodes id = "cluster_" + str(i) self.clust(clusters, id) # increment i += 1 # return clusters return clusters