Beispiel #1
0
 def test_find_overlap(self):
     """ Test the find/overlap function with an easy case
     """
     seq1 = "abdc sdf"
     seq2 = "dfke "
     seq3 = "dfs abd"
     seq4 = "dc s"
     seq5 = "pp"
     position, n_overlaps  = KMP.find_or_overlap(seq1, seq2)
     self.assertEqual(n_overlaps, 2)
     self.assertEqual(position, 6)
     position, n_overlaps = KMP.find_or_overlap(seq3, seq1)
     self.assertEqual(n_overlaps, 3)
     self.assertEqual(position, 4)
     position, n_overlaps = KMP.find_or_overlap(seq1, seq4)
     self.assertEqual(n_overlaps, 4)
     self.assertEqual(position, 2)
     position, n_overlaps = KMP.find_or_overlap(seq1, seq5)
     self.assertEqual(n_overlaps, 0)
     self.assertEqual(position, len(seq1))
Beispiel #2
0
def compute_overlaps(fragments_dict, mat):
    """ Compute the overlaps between the fragments and store them in a MatchManager
        @param fragments_dict A dictionary of fragments with their ID as key
        @param mat A MatchManager
    """
    log.debug("Computing overlaps")
    # sort the keys in descending order. A greater key will correspond (roughly) to
    # a longer fragment. This condition is only guaranteed during the first iteration
    keys = sorted(fragments_dict.keys())
    keys.reverse()
    for i, j in itertools.product(keys, keys):
        if i == j:
            continue
        if not mat.needs_calculation(i, j, len(fragments_dict[i]), len(fragments_dict[j])):
            continue
        position, n_chars = KMP.find_or_overlap(fragments_dict[i], fragments_dict[j])
        log.debug("Overlap between sequences %s (left) and %s (right): %s",i, j, n_chars)
        m = Match(i,j, position, n_chars)
        mat.store(m)