def test_back_adapter_indel_and_mismatch_occurrence():
    adapter = BackAdapter(
        sequence="GATCGGAAGA",
        max_errors=0.1,
        min_overlap=3,
    )
    match = adapter.match_to("CTGGATCGGAGAGCCGTAGATCGGGAGAGGC")
    # CTGGATCGGA-GAGCCGTAGATCGGGAGAGGC
    #    ||||||| ||      ||||||X|||
    #    GATCGGAAGA      GATCGGAAGA
    assert match.errors == 1
    assert match.matches == 9
    assert match.astart == 0
    assert match.astop == 10
    assert match.rstart == 3
    assert match.rstop == 12
def test_back_adapter_indel_and_exact_occurrence():
    adapter = BackAdapter(
        sequence="GATCGGAAGA",
        max_errors=0.1,
        min_overlap=3,
    )
    match = adapter.match_to("GATCGTGAAGAGATCGGAAGA")
    # We want the leftmost match of these two possible ones:
    # GATCGTGAAGAGATCGGAAGA
    # GATCG-GAAGA
    #            GATCGGAAGA
    assert match.errors == 0
    assert match.matches == 10
    assert match.astart == 0
    assert match.astop == 10
    assert match.rstart == 0
    assert match.rstop == 10
def test_issue_80():
    # This issue turned out to not be an actual issue with the alignment
    # algorithm. The following alignment is found because it has more matches
    # than the 'obvious' one:
    #
    # TCGTATGCCGTCTTC
    # =========X==XX=
    # TCGTATGCCCTC--C
    #
    # This is correct, albeit a little surprising, since an alignment without
    # indels would have only two errors.

    adapter = BackAdapter(sequence="TCGTATGCCGTCTTC",
                          max_errors=0.2,
                          min_overlap=3,
                          read_wildcards=False,
                          adapter_wildcards=False)
    result = adapter.match_to("TCGTATGCCCTCC")
    assert result.errors == 3, result
    assert result.astart == 0, result
    assert result.astop == 15, result
def test_back_adapter_partial_occurrence_in_front():
    adapter = BackAdapter("CTGAATT", max_errors=0, min_overlap=4)
    assert adapter.match_to("AATTGGGGGGG") is None
def test_str():
    a = BackAdapter('ACGT', max_errors=0.1)
    str(a)
    str(a.match_to("TTACGT"))