class UPGMA: def __init__(self, seqs, alseq): self.seqs = seqs self.alseq = alseq self.create_mat_dist() def create_mat_dist(self): # create distance matrix with dim N x N sequences self.matdist = NumMatrix(len(self.seqs), len(self.seqs)) for i in range(len(self.seqs)): for j in range(i, len(self.seqs)): # retrieve the two sequences to align s1 = self.seqs[i] s2 = self.seqs[j] # align the sequences self.alseq.needleman_Wunsch(s1, s2) # recover the alignment alin = self.alseq.recover_align() ncd = 0 # fill the matrix # count the number of different symbols in the alignment as the distance for k in range(len(alin)): col = alin.column(k) if (col[0] != col[1]): ncd += 1 # set distance value in the matrix self.matdist.set_value(i, j, ncd) #set for cell i,j the value of ncd def run(self): # create an object of the class HierarchicalClustering ch = HierarchicalClustering(self.matdist) # execute the clustering algorithm t = ch.execute_clustering() return t
def create_mat_dist(self): self.matdist = NumMatrix(len(self.seqs), len(self.seqs)) for i in range(len(self.seqs)): for j in range(i, len(self.seqs)): s1 = self.seqs[i] s2 = self.seqs[j] self.alseq.needleman_Wunsch(s1, s2) alin = self.alseq.recover_align() ncd = 0 for k in range(len(alin)): col = alin.column(k) if (col[0] != col[1]): ncd += 1 self.matdist.set_value(i, j, ncd)
def create_mat_dist(self): # create distance matrix self.matdist = NumMatrix(len(self.seqs), len(self.seqs)) for i in range(len(self.seqs)): for j in range(i, len(self.seqs)): # retrieve the two sequences to align s1 = self.seqs[i] s2 = self.seqs[j] # align the sequences self.alseq.needleman_Wunsch(s1, s2) # recover the alignment alin = self.alseq.recover_align() ncd = 0 # fill the matrix # count the number of different symbols in the alignment as the distance for k in range(len(alin)): col = alin.column(k) if (col[0] != col[1]): ncd += 1 # set distance value in the matrix self.matdist.set_value(i, j, ncd)
class UPGMA: def __init__(self, seqs, alseq): self.seqs = seqs self.alseq = alseq self.create_mat_dist() def create_mat_dist(self): self.matdist = NumMatrix(len(self.seqs), len(self.seqs)) for i in range(len(self.seqs)): for j in range(i, len(self.seqs)): s1 = self.seqs[i] s2 = self.seqs[j] self.alseq.needleman_Wunsch(s1, s2) alin = self.alseq.recover_align() ncd = 0 for k in range(len(alin)): col = alin.column(k) if (col[0] != col[1]): ncd += 1 self.matdist.set_value(i, j, ncd) def run(self): ch = HierarchicalClustering(self.matdist) t = ch.execute_clustering() return t
def test(): m=NumMatrix(5, 5) m.set_value(0, 1, 2) m.set_value(0, 2, 5) m.set_value(0, 3, 7) m.set_value(0, 4, 9) m.set_value(1, 2, 4) m.set_value(1, 3, 6) m.set_value(1, 4, 7) m.set_value(2, 3, 4) m.set_value(2, 4, 6) m.set_value(3, 4, 3) hc=HierarchicalClustering(m) arv=hc.execute_clustering() arv.print_tree()
def test(): m = NumMatrix(5,5)