def test_issue_80(): # This issue turned out to not be an actual issue with the alignment # algorithm. The following alignment is found because it has more matches # than the 'obvious' one: # # TCGTATGCCGTCTTC # =========X==XX= # TCGTATGCCCTC--C # # This is correct, albeit a little surprising, since an alignment without # indels would have only two errors. adapter = Adapter( sequence="TCGTATGCCGTCTTC", where=BACK, max_error_rate=0.2, min_overlap=3, read_wildcards=False, adapter_wildcards=False) read = Sequence(name="seq2", sequence="TCGTATGCCCTCC") result = adapter.match_to(read) assert read.original_length == 13, result assert result.errors == 3, result assert result.astart == 0, result assert result.astop == 15, result
def test_random_match_probabilities(): a = Adapter('A', BACK) rmp = a.random_match_probabilities() assert rmp == [1.0, 0.25] a = Adapter('AC', BACK, gc_content=0.4) rmp = a.random_match_probabilities() assert rmp == [1.0, 0.3, 0.06]
def test_statistics(): read = Sequence('name', 'AAAACCCCAAAA') adapters = [Adapter('CCCC', BACK, 0.1)] cutter = AdapterCutter(adapters, times=3) trimmed_read = cutter(read) # TODO make this a lot simpler trimmed_bp = 0 for adapter in adapters: for d in (adapter.lengths_front, adapter.lengths_back): trimmed_bp += sum(seqlen * count for (seqlen, count) in d.items()) assert trimmed_bp <= len(read), trimmed_bp
def test_issue_52(): adapter = Adapter( sequence='GAACTCCAGTCACNNNNN', where=BACK, max_error_rate=0.12, min_overlap=5, read_wildcards=False, adapter_wildcards=True) read = Sequence(name="abc", sequence='CCCCAGAACTACAGTCCCGGC') am = Match(astart=0, astop=17, rstart=5, rstop=21, matches=15, errors=2, front=None, adapter=adapter, read=read) assert am.wildcards() == 'GGC' """
def test_str(): a = Adapter('ACGT', where=BACK, max_error_rate=0.1) str(a) str(a.match_to(Sequence(name='seq', sequence='TTACGT'))) ca = ColorspaceAdapter('0123', where=BACK, max_error_rate=0.1) str(ca)