Esempio n. 1
0
def find_overlap(s, t, min_overlap=1):
	"""
	Detect if s and t overlap.

	Returns:

	None if no overlap was detected.
	0 if s is a prefix of t or t is a prefix of s.
	Positive int gives index where t starts within s.
	Negative int gives -index where s starts within t.

	>>> find_overlap('ABCDE', 'CDE')
	2
	>>> find_overlap('CDE', 'ABCDEFG')
	-2
	>>> find_overlap('ABC', 'X') is None
	True
	"""
	aligner = Aligner(s, max_error_rate=0)
	aligner.min_overlap = min_overlap
	result = aligner.locate(t)
	if result is None:
		return None
	s_start, _, t_start, _, _, _ = result
	return s_start - t_start
Esempio n. 2
0
def test_n_wildcard_in_ref_matches_n_wildcard_in_query_back():
    aligner = Aligner("NNACGT",
                      max_error_rate=0,
                      wildcard_ref=True,
                      flags=Where.BACK.value)
    match = aligner.locate("AAANTACGTAAA")
    assert match == (0, 6, 3, 9, 6, 0)
Esempio n. 3
0
 def _aligner(self, mismatch):
     if not hasattr(self, "_aligner_a"):
         max_err_r = (float(mismatch[0]) / len(self.index_a),
                      float(mismatch[1]) / len(self.index_b))
         self._aligner_a = Aligner(self.index_a, max_err_r[0])
         self._aligner_b = Aligner(self.index_b, max_err_r[1])
         self._aligner_a.min_overlap = len(self.index_a) - mismatch[0]
         self._aligner_b.min_overlap = len(self.index_b) - mismatch[1]
     return self._aligner_a, self._aligner_b
Esempio n. 4
0
def locate(reference,
           query,
           max_error_rate,
           flags=SEMIGLOBAL,
           wildcard_ref=False,
           wildcard_query=False,
           min_overlap=1):
    aligner = Aligner(reference,
                      max_error_rate,
                      flags,
                      wildcard_ref,
                      wildcard_query,
                      min_overlap=min_overlap)
    return aligner.locate(query)
Esempio n. 5
0
def test_n_wildcards_not_counted_aligner_back():
    ref = 'AGGNNNNNNNNNNNNNNTTC'
    assert len(ref) == 20
    aligner = Aligner(ref, max_error_rate=0.1, wildcard_ref=True, flags=Where.BACK.value, min_overlap=3)
    assert aligner.effective_length == 6
    assert aligner.locate('TTC') is None
    # adapter start, adapter stop, read start, read stop
    assert aligner.locate('AGG')[:4] == (0, 3, 0, 3)
    assert aligner.locate('AGGCCCCCCC')[:4] == (0, 10, 0, 10)
    assert aligner.locate('ATGCCCCCCC') is None
    assert aligner.locate('AGGCCCCCCCCCCCCCCATC') is None
    assert aligner.locate('CCC' + ref.replace('N', 'G') + 'AAA') == (0, 20, 3, 23, 20, 0)
Esempio n. 6
0
def test_n_wildcards_not_counted_aligner_back():
    ref = 'AGGNNNNNNNNNNNNNNTTC'
    assert len(ref) == 20
    aligner = Aligner(ref,
                      max_error_rate=0.1,
                      wildcard_ref=True,
                      flags=Where.BACK.value,
                      min_overlap=3)
    assert aligner.effective_length == 6
    assert aligner.locate('TTC') is None
    # adapter start, adapter stop, read start, read stop
    assert aligner.locate('AGG')[:4] == (0, 3, 0, 3)
    assert aligner.locate('AGGCCCCCCC')[:4] == (0, 10, 0, 10)
    assert aligner.locate('ATGCCCCCCC') is None
    assert aligner.locate('AGGCCCCCCCCCCCCCCATC') is None
    assert aligner.locate('CCC' + ref.replace('N', 'G') + 'AAA') == (0, 20, 3,
                                                                     23, 20, 0)
Esempio n. 7
0
def test_edit_environment(k, s, environment_func):
    result = list(environment_func(s, k))
    strings, distances, matches = zip(*result)
    naive = set(naive_edit_environment(s, k))
    assert len(set(strings)) == len(strings)
    assert set(strings) == naive

    error_rate = k / len(s) if s else 0.0
    aligner = Aligner(s,
                      max_error_rate=error_rate,
                      flags=0,
                      min_overlap=len(s))
    for t, dist, m in result:
        result = aligner.locate(t)
        start1, stop1, start2, stop2, matches, errors = result
        assert errors == dist
        assert m == matches
        assert start1 == 0
        assert stop1 == len(s)
        assert start2 == 0
        assert stop2 == len(t)
        assert edit_distance(s, t) == dist
        assert m <= len(s), (s, t, dist)
        assert m <= len(t), (s, t, dist)
Esempio n. 8
0
	def test(self):
		reference = 'CTCCAGCTTAGACATATC'
		aligner = Aligner(reference, 0.1, flags=BACK)
		aligner.locate('CC')
Esempio n. 9
0
	def test_100_percent_error_rate(self):
		reference = 'GCTTAGACATATC'
		aligner = Aligner(reference, 1.0, flags=BACK)
		aligner.locate('CAA')
Esempio n. 10
0
 def test_not_only_n_wildcards(self):
     reference = 'NNNNN'
     with pytest.raises(ValueError) as info:
         Aligner(reference, 0.1, wildcard_ref=True)
     assert "only N wildcards" in info.value.args[0]
Esempio n. 11
0
 def test_100_percent_error_rate(self):
     reference = 'GCTTAGACATATC'
     aligner = Aligner(reference, 1.0, flags=Where.BACK.value)
     aligner.locate('CAA')
Esempio n. 12
0
 def test(self):
     reference = 'CTCCAGCTTAGACATATC'
     aligner = Aligner(reference, 0.1, flags=Where.BACK.value)
     aligner.locate('CC')
Esempio n. 13
0
def locate(reference, query, max_error_rate, flags=SEMIGLOBAL, wildcard_ref=False,
        wildcard_query=False, min_overlap=1):
    aligner = Aligner(reference, max_error_rate, flags, wildcard_ref, wildcard_query, min_overlap=min_overlap)
    return aligner.locate(query)
Esempio n. 14
0
 def test_find_empty_in_empty(self):
     aligner = Aligner("", 0, flags=0, min_overlap=0)
     result = aligner.locate("")
     assert (0, 0, 0, 0, 0, 0) == result