Ejemplo n.º 1
0
class UPGMA:
    def __init__(self, seqs, alseq):
        self.seqs = seqs
        self.alseq = alseq
        self.create_mat_dist()

    def create_mat_dist(self):
        # create distance matrix with dim N x N sequences
        self.matdist = NumMatrix(len(self.seqs), len(self.seqs))
        for i in range(len(self.seqs)):
            for j in range(i, len(self.seqs)):
                # retrieve the two sequences to align
                s1 = self.seqs[i]
                s2 = self.seqs[j]
                # align the sequences
                self.alseq.needleman_Wunsch(s1, s2)
                # recover the alignment
                alin = self.alseq.recover_align()
                ncd = 0
                # fill the matrix
                # count the number of different symbols in the alignment as the distance
                for k in range(len(alin)):
                    col = alin.column(k)
                    if (col[0] != col[1]): ncd += 1
                # set distance value in the matrix
                self.matdist.set_value(i, j,
                                       ncd)  #set for cell i,j the value of ncd

    def run(self):
        # create an object of the class HierarchicalClustering
        ch = HierarchicalClustering(self.matdist)
        # execute the clustering algorithm
        t = ch.execute_clustering()
        return t
 def create_mat_dist(self):
     self.matdist = NumMatrix(len(self.seqs), len(self.seqs))
     for i in range(len(self.seqs)):
         for j in range(i, len(self.seqs)):
             s1 = self.seqs[i]
             s2 = self.seqs[j]
             self.alseq.needleman_Wunsch(s1, s2)
             alin = self.alseq.recover_align()
             ncd = 0
             for k in range(len(alin)):
                 col = alin.column(k)
                 if (col[0] != col[1]): ncd += 1
             self.matdist.set_value(i, j, ncd)
Ejemplo n.º 3
0
 def create_mat_dist(self):
     # create distance matrix
     self.matdist = NumMatrix(len(self.seqs), len(self.seqs))
     for i in range(len(self.seqs)):
         for j in range(i, len(self.seqs)):
             # retrieve the two sequences to align
             s1 = self.seqs[i]
             s2 = self.seqs[j]
             # align the sequences
             self.alseq.needleman_Wunsch(s1, s2)
             # recover the alignment
             alin = self.alseq.recover_align()
             ncd = 0
             # fill the matrix
             # count the number of different symbols in the alignment as the distance
             for k in range(len(alin)):
                 col = alin.column(k)
                 if (col[0] != col[1]): ncd += 1
             # set distance value in the matrix
             self.matdist.set_value(i, j, ncd)
class UPGMA:
    def __init__(self, seqs, alseq):
        self.seqs = seqs
        self.alseq = alseq
        self.create_mat_dist()

    def create_mat_dist(self):
        self.matdist = NumMatrix(len(self.seqs), len(self.seqs))
        for i in range(len(self.seqs)):
            for j in range(i, len(self.seqs)):
                s1 = self.seqs[i]
                s2 = self.seqs[j]
                self.alseq.needleman_Wunsch(s1, s2)
                alin = self.alseq.recover_align()
                ncd = 0
                for k in range(len(alin)):
                    col = alin.column(k)
                    if (col[0] != col[1]): ncd += 1
                self.matdist.set_value(i, j, ncd)

    def run(self):
        ch = HierarchicalClustering(self.matdist)
        t = ch.execute_clustering()
        return t
Ejemplo n.º 5
0
def test():
    m=NumMatrix(5, 5)
    m.set_value(0, 1, 2)
    m.set_value(0, 2, 5)
    m.set_value(0, 3, 7)
    m.set_value(0, 4, 9)
    m.set_value(1, 2, 4)
    m.set_value(1, 3, 6)
    m.set_value(1, 4, 7)
    m.set_value(2, 3, 4)
    m.set_value(2, 4, 6)
    m.set_value(3, 4, 3)
    hc=HierarchicalClustering(m)
    arv=hc.execute_clustering()
    arv.print_tree()
Ejemplo n.º 6
0
def test():
    m = NumMatrix(5,5)