Ejemplo n.º 1
0
def test_n_wildcards_not_counted_aligner_front():
    ref = 'AGGNNNNNNNNNNNNNNTTC'
    assert len(ref) == 20
    aligner = Aligner(ref, max_error_rate=0.1, wildcard_ref=True, flags=Where.FRONT.value, min_overlap=3)
    assert aligner.effective_length == 6
    # adapter start, adapter stop, read start, read stop
    assert aligner.locate('TTC')[:4] == (17, 20, 0, 3)
    assert aligner.locate('TGC') is None
    assert aligner.locate('CCCCCCCTTC')[:4] == (10, 20, 0, 10)
    assert aligner.locate('CCCCCCCGTC') is None
    assert aligner.locate('CCC' + ref.replace('N', 'G') + 'AAA') == (0, 20, 3, 23, 20, 0)
Ejemplo n.º 2
0
def test_n_wildcards_not_counted_aligner_front():
    ref = 'AGGNNNNNNNNNNNNNNTTC'
    assert len(ref) == 20
    aligner = Aligner(ref,
                      max_error_rate=0.1,
                      wildcard_ref=True,
                      flags=Where.FRONT.value,
                      min_overlap=3)
    assert aligner.effective_length == 6
    # adapter start, adapter stop, read start, read stop
    assert aligner.locate('TTC')[:4] == (17, 20, 0, 3)
    assert aligner.locate('TGC') is None
    assert aligner.locate('CCCCCCCTTC')[:4] == (10, 20, 0, 10)
    assert aligner.locate('CCCCCCCGTC') is None
    assert aligner.locate('CCC' + ref.replace('N', 'G') + 'AAA') == (0, 20, 3,
                                                                     23, 20, 0)
Ejemplo n.º 3
0
def test_n_wildcard_in_ref_matches_n_wildcard_in_query_back():
    aligner = Aligner("NNACGT",
                      max_error_rate=0,
                      wildcard_ref=True,
                      flags=Where.BACK.value)
    match = aligner.locate("AAANTACGTAAA")
    assert match == (0, 6, 3, 9, 6, 0)
Ejemplo n.º 4
0
def find_overlap(s, t, min_overlap=1):
	"""
	Detect if s and t overlap.

	Returns:

	None if no overlap was detected.
	0 if s is a prefix of t or t is a prefix of s.
	Positive int gives index where t starts within s.
	Negative int gives -index where s starts within t.

	>>> find_overlap('ABCDE', 'CDE')
	2
	>>> find_overlap('CDE', 'ABCDEFG')
	-2
	>>> find_overlap('ABC', 'X') is None
	True
	"""
	aligner = Aligner(s, max_error_rate=0)
	aligner.min_overlap = min_overlap
	result = aligner.locate(t)
	if result is None:
		return None
	s_start, _, t_start, _, _, _ = result
	return s_start - t_start
Ejemplo n.º 5
0
def locate(reference,
           query,
           max_error_rate,
           flags=SEMIGLOBAL,
           wildcard_ref=False,
           wildcard_query=False,
           min_overlap=1):
    aligner = Aligner(reference,
                      max_error_rate,
                      flags,
                      wildcard_ref,
                      wildcard_query,
                      min_overlap=min_overlap)
    return aligner.locate(query)
Ejemplo n.º 6
0
def test_edit_environment(k, s, environment_func):
    result = list(environment_func(s, k))
    strings, distances, matches = zip(*result)
    naive = set(naive_edit_environment(s, k))
    assert len(set(strings)) == len(strings)
    assert set(strings) == naive

    error_rate = k / len(s) if s else 0.0
    aligner = Aligner(s,
                      max_error_rate=error_rate,
                      flags=0,
                      min_overlap=len(s))
    for t, dist, m in result:
        result = aligner.locate(t)
        start1, stop1, start2, stop2, matches, errors = result
        assert errors == dist
        assert m == matches
        assert start1 == 0
        assert stop1 == len(s)
        assert start2 == 0
        assert stop2 == len(t)
        assert edit_distance(s, t) == dist
        assert m <= len(s), (s, t, dist)
        assert m <= len(t), (s, t, dist)
Ejemplo n.º 7
0
	def test(self):
		reference = 'CTCCAGCTTAGACATATC'
		aligner = Aligner(reference, 0.1, flags=BACK)
		aligner.locate('CC')
Ejemplo n.º 8
0
	def test_100_percent_error_rate(self):
		reference = 'GCTTAGACATATC'
		aligner = Aligner(reference, 1.0, flags=BACK)
		aligner.locate('CAA')
Ejemplo n.º 9
0
 def test_100_percent_error_rate(self):
     reference = 'GCTTAGACATATC'
     aligner = Aligner(reference, 1.0, flags=Where.BACK.value)
     aligner.locate('CAA')
Ejemplo n.º 10
0
 def test(self):
     reference = 'CTCCAGCTTAGACATATC'
     aligner = Aligner(reference, 0.1, flags=Where.BACK.value)
     aligner.locate('CC')
Ejemplo n.º 11
0
def locate(reference, query, max_error_rate, flags=SEMIGLOBAL, wildcard_ref=False,
        wildcard_query=False, min_overlap=1):
    aligner = Aligner(reference, max_error_rate, flags, wildcard_ref, wildcard_query, min_overlap=min_overlap)
    return aligner.locate(query)
Ejemplo n.º 12
0
 def test_find_empty_in_empty(self):
     aligner = Aligner("", 0, flags=0, min_overlap=0)
     result = aligner.locate("")
     assert (0, 0, 0, 0, 0, 0) == result