def test_indexed_too_high_k(): with pytest.raises(ValueError) as e: IndexedPrefixAdapters([ PrefixAdapter("ACGTACGT", max_errors=3, indels=False), PrefixAdapter("AAGGTTCC", max_errors=2, indels=False), ]) assert "Error rate too high" in e.value.args[0]
def test_multi_prefix_adapter_with_indels(): adapters = [ PrefixAdapter("GTAC", max_errors=1, indels=True), PrefixAdapter("TGCT", max_errors=1, indels=True), ] ma = IndexedPrefixAdapters(adapters) match = ma.match_to("GATACGGG") assert match.adapter is adapters[0] match = ma.match_to("TAGCTAA") assert match.adapter is adapters[1]
def test_indexed_prefix_adapters(): adapters = [ PrefixAdapter("GAAC", indels=False), PrefixAdapter("TGCT", indels=False), ] ma = IndexedPrefixAdapters(adapters) match = ma.match_to("GAACTT") assert match.adapter is adapters[0] match = ma.match_to("TGCTAA") assert match.adapter is adapters[1]
def test_multi_prefix_adapter(): adapters = [ PrefixAdapter("GAAC", indels=False), PrefixAdapter("TGCT", indels=False), ] ma = MultiPrefixAdapter(adapters) match = ma.match_to("GAACTT") assert match.adapter is adapters[0] match = ma.match_to("TGCTAA") assert match.adapter is adapters[1]
def test_prefix_with_indels_two_mismatches(): a = PrefixAdapter( sequence="GCACATTT", max_errors=0.3, min_overlap=1, read_wildcards=False, adapter_wildcards=False, indels=True, ) result = a.match_to("GCACATCGGAA") assert result.errors == 2 assert result.matches == 6 assert result.astart == 0 assert result.astop == 8 assert result.rstart == 0 assert result.rstop == 8
def test_reverse_complementer(): adapters = [ PrefixAdapter("TTATTTGTCT"), PrefixAdapter("TCCGCACTGG"), ] adapter_cutter = AdapterCutter(adapters, index=False) reverse_complementer = ReverseComplementer(adapter_cutter) read = Sequence("r", "ttatttgtctCCAGCTTAGACATATCGCCT") info = ModificationInfo(read) trimmed = reverse_complementer(read, info) assert trimmed.sequence == "CCAGCTTAGACATATCGCCT" assert not info.is_rc read = Sequence("r", "CAACAGGCCACATTAGACATATCGGATGGTagacaaataa") info = ModificationInfo(read) trimmed = reverse_complementer(read, info) assert trimmed.sequence == "ACCATCCGATATGTCTAATGTGGCCTGTTG" assert info.is_rc
def test_indexed_prefix_adapters_with_n_wildcard(): sequence = "GGTCCAGA" ma = IndexedPrefixAdapters([PrefixAdapter(sequence, max_errors=1, indels=False)]) for i in range(len(sequence)): t = sequence[:i] + "N" + sequence[i+1:] + "TGCT" result = ma.match_to(t) assert isinstance(result, RemoveBeforeMatch) assert (result.rstart, result.rstop) == (0, 8) assert result.errors == 1 assert result.matches == 7
def test_adapter_cutter_indexing(): adapters = [ PrefixAdapter(sequence, max_errors=1, indels=False) for sequence in ["ACGAT", "GGAC", "TTTACTTA", "TAACCGGT", "GTTTACGTA", "CGATA"] ] ac = AdapterCutter(adapters) assert len(ac.adapters) == 1 assert isinstance(ac.adapters[0], IndexedPrefixAdapters) ac = AdapterCutter(adapters, index=False) assert len(ac.adapters) == len(adapters)
def test_linked_adapter(): front_adapter = PrefixAdapter('AAAA', min_overlap=4) back_adapter = BackAdapter('TTTT', min_overlap=3) linked_adapter = LinkedAdapter( front_adapter, back_adapter, front_required=True, back_required=False, name='name') assert linked_adapter.front_adapter.min_overlap == 4 assert linked_adapter.back_adapter.min_overlap == 3 read = Sequence(name='seq', sequence='AAAACCCCCTTTT') trimmed = linked_adapter.match_to(read.sequence).trimmed(read) assert trimmed.name == 'seq' assert trimmed.sequence == 'CCCCC'
def test_indexed_suffix_adapters_incorrect_type(): with pytest.raises(ValueError): IndexedSuffixAdapters([ SuffixAdapter("GAAC", indels=False), PrefixAdapter("TGCT", indels=False), ])
def test_prefix_match_with_n_wildcard_in_read(): adapter = PrefixAdapter("NNNACGT", indels=False) match = adapter.match_to("TTTACGTAAAA") assert match is not None and (0, 7) == (match.rstart, match.rstop) match = adapter.match_to("NTTACGTAAAA") assert match is not None and (0, 7) == (match.rstart, match.rstop)
def test_indexed_very_similar(caplog): IndexedPrefixAdapters([ PrefixAdapter("GAAC", max_errors=1, indels=False), PrefixAdapter("GAAG", max_errors=1, indels=False), ]) assert "cannot be assigned uniquely" in caplog.text
def test_multi_suffix_adapter_incorrect_type(): with pytest.raises(ValueError): MultiSuffixAdapter([ SuffixAdapter("GAAC", indels=False), PrefixAdapter("TGCT", indels=False), ])