def test_find_overlap(self): """ Test the find/overlap function with an easy case """ seq1 = "abdc sdf" seq2 = "dfke " seq3 = "dfs abd" seq4 = "dc s" seq5 = "pp" position, n_overlaps = KMP.find_or_overlap(seq1, seq2) self.assertEqual(n_overlaps, 2) self.assertEqual(position, 6) position, n_overlaps = KMP.find_or_overlap(seq3, seq1) self.assertEqual(n_overlaps, 3) self.assertEqual(position, 4) position, n_overlaps = KMP.find_or_overlap(seq1, seq4) self.assertEqual(n_overlaps, 4) self.assertEqual(position, 2) position, n_overlaps = KMP.find_or_overlap(seq1, seq5) self.assertEqual(n_overlaps, 0) self.assertEqual(position, len(seq1))
def compute_overlaps(fragments_dict, mat): """ Compute the overlaps between the fragments and store them in a MatchManager @param fragments_dict A dictionary of fragments with their ID as key @param mat A MatchManager """ log.debug("Computing overlaps") # sort the keys in descending order. A greater key will correspond (roughly) to # a longer fragment. This condition is only guaranteed during the first iteration keys = sorted(fragments_dict.keys()) keys.reverse() for i, j in itertools.product(keys, keys): if i == j: continue if not mat.needs_calculation(i, j, len(fragments_dict[i]), len(fragments_dict[j])): continue position, n_chars = KMP.find_or_overlap(fragments_dict[i], fragments_dict[j]) log.debug("Overlap between sequences %s (left) and %s (right): %s",i, j, n_chars) m = Match(i,j, position, n_chars) mat.store(m)