Example #1
0
 def test_natural_cutoff(self):
     lo, hi = 0.2, 0.5
     self.assertAlmostEqual(
         sequt.natural_cutoff([0.28, 0.40, 0.50, 0.65], lo, hi), 0.28)
     self.assertAlmostEqual(
         sequt.natural_cutoff([0.1, 0.17, 0.21, 0.30, 0.51], lo, hi), 0.21)
     self.assertAlmostEqual(
         sequt.natural_cutoff([0.1, 0.17, 0.21, 0.28, 0.37, 0.45, 0.51], lo,
                              hi), 0.2)
Example #2
0
 def test_natural_cutoff (self):
     lo, hi = 0.2, 0.5
     self.assertAlmostEqual(
         sequt.natural_cutoff(
             [0.28, 0.40, 0.50, 0.65],
             lo, hi
         ),
         0.28
     )
     self.assertAlmostEqual(
         sequt.natural_cutoff(
             [0.1, 0.17, 0.21, 0.30, 0.51],
             lo, hi
         ),
         0.21
     )
     self.assertAlmostEqual(
         sequt.natural_cutoff(
             [0.1, 0.17, 0.21, 0.28, 0.37, 0.45, 0.51],
             lo, hi
         ),
         0.2
     )
Example #3
0
 def cluster_by_jaccard (self, minlen=5):  #, threshold=0.7):
     """
     """
     lass = self.lass
     dim = len(lass)
     jaccdist = self.jaccarrays['distance']
     for i, istr in enumerate(lass):
         if len(istr) < minlen:
             continue
         threshold = sequt.natural_cutoff(jaccdist[i], lo=0.2, hi=0.3)
         for j in xrange(dim):
             jstr = lass[j]
             if (i == j) or len(jstr) < minlen:
                 continue
             if jaccdist[i,j] <= threshold:
                 self.clusters.merge(istr, jstr)
Example #4
0
 def link_across_length_levels (self):  #, threshold=0.75):
     """ Link across all length levels to create larger clusters.
     """
     #assert 0 < threshold <= 1
     #threshold = 0.3
     lass = self.lass
     dim = len(lass)
     for i, istr in enumerate(lass):
         vec_i = self.editdist[i]  # array1d
         dummy = np.ones(dim) * 999
         for j in xrange(dim):
             if i == j: continue
             dummy[j] = 1.0 * vec_i[j] / min(len(istr), len(lass[j]))
         jmin = np.argmin(dummy)
         threshold = sequt.natural_cutoff(dummy, lo=0.2, hi=0.4)
         if dummy[jmin] <= threshold:
             self.clusters.merge(istr, lass[jmin])
Example #5
0
 def merge_within_length_level (self):
     """ Find out small clusters within each length level.
     """
     #assert 0 < threshold <= 1
     lass = self.lass
     for length, lo_hi in lass.lenbounds.iteritems():
         lo, hi = lo_hi
         if hi - lo < 2:
             continue
         #for i in xrange(lo, hi-1):
         #    for j in xrange(i+1, hi):
         #        if self.editdist[i, j] <= length * 0.5:
         #            self.clusters.merge(lass[i], lass[j])
         for i in xrange(lo, hi):
             # Find the best cut-off in [0.2, 0.5]
             threshold = sequt.natural_cutoff(
                 self.editdist[i, lo:hi] /float(length), lo=0.2, hi=0.4)
             for j in xrange(lo, hi):
                 if i != j and self.editdist[i, j] <= threshold * length:
                     self.clusters.merge(lass[i], lass[j])