def test_back_adapter_indel_and_mismatch_occurrence(): adapter = BackAdapter( sequence="GATCGGAAGA", max_errors=0.1, min_overlap=3, ) match = adapter.match_to("CTGGATCGGAGAGCCGTAGATCGGGAGAGGC") # CTGGATCGGA-GAGCCGTAGATCGGGAGAGGC # ||||||| || ||||||X||| # GATCGGAAGA GATCGGAAGA assert match.errors == 1 assert match.matches == 9 assert match.astart == 0 assert match.astop == 10 assert match.rstart == 3 assert match.rstop == 12
def test_back_adapter_indel_and_exact_occurrence(): adapter = BackAdapter( sequence="GATCGGAAGA", max_errors=0.1, min_overlap=3, ) match = adapter.match_to("GATCGTGAAGAGATCGGAAGA") # We want the leftmost match of these two possible ones: # GATCGTGAAGAGATCGGAAGA # GATCG-GAAGA # GATCGGAAGA assert match.errors == 0 assert match.matches == 10 assert match.astart == 0 assert match.astop == 10 assert match.rstart == 0 assert match.rstop == 10
def test_issue_80(): # This issue turned out to not be an actual issue with the alignment # algorithm. The following alignment is found because it has more matches # than the 'obvious' one: # # TCGTATGCCGTCTTC # =========X==XX= # TCGTATGCCCTC--C # # This is correct, albeit a little surprising, since an alignment without # indels would have only two errors. adapter = BackAdapter(sequence="TCGTATGCCGTCTTC", max_errors=0.2, min_overlap=3, read_wildcards=False, adapter_wildcards=False) result = adapter.match_to("TCGTATGCCCTCC") assert result.errors == 3, result assert result.astart == 0, result assert result.astop == 15, result
def test_back_adapter_partial_occurrence_in_front(): adapter = BackAdapter("CTGAATT", max_errors=0, min_overlap=4) assert adapter.match_to("AATTGGGGGGG") is None
def test_str(): a = BackAdapter('ACGT', max_errors=0.1) str(a) str(a.match_to("TTACGT"))