Esempio n. 1
0
def test_issue_80():
    # This issue turned out to not be an actual issue with the alignment
    # algorithm. The following alignment is found because it has more matches
    # than the 'obvious' one:
    #
    # TCGTATGCCGTCTTC
    # =========X==XX=
    # TCGTATGCCCTC--C
    #
    # This is correct, albeit a little surprising, since an alignment without
    # indels would have only two errors.

    adapter = Adapter(
        sequence="TCGTATGCCGTCTTC",
        where=BACK,
        max_error_rate=0.2,
        min_overlap=3,
        read_wildcards=False,
        adapter_wildcards=False)
    read = Sequence(name="seq2", sequence="TCGTATGCCCTCC")
    result = adapter.match_to(read)
    assert read.original_length == 13, result
    assert result.errors == 3, result
    assert result.astart == 0, result
    assert result.astop == 15, result
Esempio n. 2
0
def test_random_match_probabilities():
    a = Adapter('A', BACK)
    rmp = a.random_match_probabilities()
    assert rmp == [1.0, 0.25]
    
    a = Adapter('AC', BACK, gc_content=0.4)
    rmp = a.random_match_probabilities()
    assert rmp == [1.0, 0.3, 0.06]
Esempio n. 3
0
def test_statistics():
    read = Sequence('name', 'AAAACCCCAAAA')
    adapters = [Adapter('CCCC', BACK, 0.1)]
    cutter = AdapterCutter(adapters, times=3)
    trimmed_read = cutter(read)
    # TODO make this a lot simpler
    trimmed_bp = 0
    for adapter in adapters:
        for d in (adapter.lengths_front, adapter.lengths_back):
            trimmed_bp += sum(seqlen * count for (seqlen, count) in d.items())
    assert trimmed_bp <= len(read), trimmed_bp
Esempio n. 4
0
def test_issue_52():
    adapter = Adapter(
        sequence='GAACTCCAGTCACNNNNN',
        where=BACK,
        max_error_rate=0.12,
        min_overlap=5,
        read_wildcards=False,
        adapter_wildcards=True)
    read = Sequence(name="abc", sequence='CCCCAGAACTACAGTCCCGGC')
    am = Match(astart=0, astop=17, rstart=5, rstop=21, matches=15, errors=2, front=None, adapter=adapter, read=read)
    assert am.wildcards() == 'GGC'
    """
Esempio n. 5
0
def test_random_match_probabilities():
    a = Adapter('A', BACK)
    rmp = a.random_match_probabilities()
    assert rmp == [1.0, 0.25]
    a = Adapter('AC', BACK, gc_content=0.4)
    rmp = a.random_match_probabilities()
    assert rmp == [1.0, 0.3, 0.06]
Esempio n. 6
0
def test_str():
    a = Adapter('ACGT', where=BACK, max_error_rate=0.1)
    str(a)
    str(a.match_to(Sequence(name='seq', sequence='TTACGT')))
    ca = ColorspaceAdapter('0123', where=BACK, max_error_rate=0.1)
    str(ca)