Python Adapter Beispiele, cutadapt.adapters.Adapter Python Beispiele

Beispiel #1

0

Datei anzeigen

Datei: test_adapters.py Projekt: marcelm/cutadapt

def test_issue_80():
    # This issue turned out to not be an actual issue with the alignment
    # algorithm. The following alignment is found because it has more matches
    # than the 'obvious' one:
    #
    # TCGTATGCCGTCTTC
    # =========X==XX=
    # TCGTATGCCCTC--C
    #
    # This is correct, albeit a little surprising, since an alignment without
    # indels would have only two errors.

    adapter = Adapter(
        sequence="TCGTATGCCGTCTTC",
        where=Where.BACK,
        remove='suffix',
        max_error_rate=0.2,
        min_overlap=3,
        read_wildcards=False,
        adapter_wildcards=False)
    read = Sequence(name="seq2", sequence="TCGTATGCCCTCC")
    result = adapter.match_to(read)
    assert result.errors == 3, result
    assert result.astart == 0, result
    assert result.astop == 15, result

Beispiel #2

0

Datei anzeigen

Datei: test_adapters.py Projekt: kingdynasty/cutadapt

def test_issue_80():
	# This issue turned out to not be an actual issue with the alignment
	# algorithm. The following alignment is found because it has more matches
	# than the 'obvious' one:
	#
	# TCGTATGCCGTCTTC
	# =========X==XX=
	# TCGTATGCCCTC--C
	#
	# This is correct, albeit a little surprising, since an alignment without
	# indels would have only two errors.

	adapter = Adapter(
		sequence="TCGTATGCCGTCTTC",
		where=BACK,
		remove='suffix',
		max_error_rate=0.2,
		min_overlap=3,
		read_wildcards=False,
		adapter_wildcards=False)
	read = Sequence(name="seq2", sequence="TCGTATGCCCTCC")
	result = adapter.match_to(read)
	assert result.errors == 3, result
	assert result.astart == 0, result
	assert result.astop == 15, result

Beispiel #3

0

Datei anzeigen

Datei: testadapters.py Projekt: MMesbahU/cutadapt

def test_parse_braces_fail():
	for expression in ['{', '}', '{}', '{5', '{1}', 'A{-7}', 'A{', 'A{1', 'N{7', 'AN{7', 'A{4{}',
			'A{4}{3}', 'A{b}', 'A{6X}', 'A{X6}']:
		print(expression)
		try:
			Adapter.parse_braces(expression)
		except ValueError as e:
			print(e)
		assert_raises(ValueError, lambda: Adapter.parse_braces(expression))

Beispiel #4

0

Datei anzeigen

def test_parse_braces():
    assert Adapter.parse_braces('') == ''
    assert Adapter.parse_braces('A') == 'A'
    assert Adapter.parse_braces('A{0}') == ''
    assert Adapter.parse_braces('A{1}') == 'A'
    assert Adapter.parse_braces('A{2}') == 'AA'
    assert Adapter.parse_braces('A{2}C') == 'AAC'
    assert Adapter.parse_braces('ACGTN{3}TGACCC') == 'ACGTNNNTGACCC'
    assert Adapter.parse_braces('ACGTN{10}TGACCC') == 'ACGTNNNNNNNNNNTGACCC'
    assert Adapter.parse_braces('ACGTN{3}TGA{4}CCC') == 'ACGTNNNTGAAAACCC'
    assert Adapter.parse_braces('ACGTN{0}TGA{4}CCC') == 'ACGTTGAAAACCC'

Beispiel #5

0

Datei anzeigen

Datei: testadapters.py Projekt: MMesbahU/cutadapt

def test_parse_braces():
	assert Adapter.parse_braces('') == ''
	assert Adapter.parse_braces('A') == 'A'
	assert Adapter.parse_braces('A{0}') == ''
	assert Adapter.parse_braces('A{1}') == 'A'
	assert Adapter.parse_braces('A{2}') == 'AA'
	assert Adapter.parse_braces('A{2}C') == 'AAC'
	assert Adapter.parse_braces('ACGTN{3}TGACCC') == 'ACGTNNNTGACCC'
	assert Adapter.parse_braces('ACGTN{10}TGACCC') == 'ACGTNNNNNNNNNNTGACCC'
	assert Adapter.parse_braces('ACGTN{3}TGA{4}CCC') == 'ACGTNNNTGAAAACCC'
	assert Adapter.parse_braces('ACGTN{0}TGA{4}CCC') == 'ACGTTGAAAACCC'

Beispiel #6

0

Datei anzeigen

def test_issue_265():
    """Crash when accessing the matches property of non-anchored linked adapters"""
    s = Sequence('name', 'AAAATTTT')
    front_adapter = Adapter('GGG', where=Where.FRONT)
    back_adapter = Adapter('TTT', where=Where.BACK)
    la = LinkedAdapter(front_adapter,
                       back_adapter,
                       front_required=False,
                       back_required=False,
                       name='name')
    assert la.match_to(s).matches == 3

Beispiel #7

0

Datei anzeigen

def test_parse_braces_fail():
    for expression in [
            '{', '}', '{}', '{5', '{1}', 'A{-7}', 'A{', 'A{1', 'N{7', 'AN{7',
            'A{4{}', 'A{4}{3}', 'A{b}', 'A{6X}', 'A{X6}'
    ]:
        print(expression)
        try:
            Adapter.parse_braces(expression)
        except ValueError as e:
            print(e)
        assert_raises(ValueError, lambda: Adapter.parse_braces(expression))

Beispiel #8

0

Datei anzeigen

def test_add_adapter_statistics():
    stats = Adapter('A', name='name', where=Where.BACK,
                    max_error_rate=0.1).create_statistics()
    end_stats = stats.back
    end_stats.adjacent_bases['A'] = 7
    end_stats.adjacent_bases['C'] = 19
    end_stats.adjacent_bases['G'] = 23
    end_stats.adjacent_bases['T'] = 42
    end_stats.adjacent_bases[''] = 45

    end_stats.errors[10][0] = 100
    end_stats.errors[10][1] = 11
    end_stats.errors[10][2] = 3
    end_stats.errors[20][0] = 600
    end_stats.errors[20][1] = 66
    end_stats.errors[20][2] = 6

    stats2 = Adapter('A', name='name', where=Where.BACK,
                     max_error_rate=0.1).create_statistics()
    end_stats2 = stats2.back
    end_stats2.adjacent_bases['A'] = 43
    end_stats2.adjacent_bases['C'] = 31
    end_stats2.adjacent_bases['G'] = 27
    end_stats2.adjacent_bases['T'] = 8
    end_stats2.adjacent_bases[''] = 5
    end_stats2.errors[10][0] = 234
    end_stats2.errors[10][1] = 14
    end_stats2.errors[10][3] = 5
    end_stats2.errors[15][0] = 90
    end_stats2.errors[15][1] = 17
    end_stats2.errors[15][2] = 2

    stats += stats2
    r = stats.back

    assert r.adjacent_bases == {'A': 50, 'C': 50, 'G': 50, 'T': 50, '': 50}
    assert r.errors == {
        10: {
            0: 334,
            1: 25,
            2: 3,
            3: 5
        },
        15: {
            0: 90,
            1: 17,
            2: 2
        },
        20: {
            0: 600,
            1: 66,
            2: 6
        },
    }

Beispiel #9

0

Datei anzeigen

Datei: test_adapters.py Projekt: kingdynasty/cutadapt

def test_random_match_probabilities():
	a = Adapter('A', where=BACK, max_error_rate=0.1).create_statistics()
	assert a.back.random_match_probabilities(0.5) == [1, 0.25]
	assert a.back.random_match_probabilities(0.2) == [1, 0.4]

	for s in ('ACTG', 'XMWH'):
		a = Adapter(s, where=BACK, max_error_rate=0.1).create_statistics()
		assert a.back.random_match_probabilities(0.5) == [1, 0.25, 0.25**2, 0.25**3, 0.25**4]
		assert a.back.random_match_probabilities(0.2) == [1, 0.4, 0.4*0.1, 0.4*0.1*0.4, 0.4*0.1*0.4*0.1]

	a = Adapter('GTCA', where=FRONT, max_error_rate=0.1).create_statistics()
	assert a.front.random_match_probabilities(0.5) == [1, 0.25, 0.25**2, 0.25**3, 0.25**4]
	assert a.front.random_match_probabilities(0.2) == [1, 0.4, 0.4*0.1, 0.4*0.1*0.4, 0.4*0.1*0.4*0.1]

Beispiel #10

0

Datei anzeigen

Datei: testadapters.py Projekt: jgibson2/cutadapt-vbim

def test_info_record():
    adapter = Adapter(
        sequence='GAACTCCAGTCACNNNNN',
        where=BACK,
        max_error_rate=0.12,
        min_overlap=5,
        read_wildcards=False,
        adapter_wildcards=True,
        name="Foo")
    read = Sequence(name="abc", sequence='CCCCAGAACTACAGTCCCGGC')
    am = Match(astart=0, astop=17, rstart=5, rstop=21, matches=15, errors=2, remove_before=False,
               adapter=adapter, read=read)
    assert am.get_info_record() == (
        "abc",
        2,
        5,
        21,
        'CCCCA',
        'GAACTACAGTCCCGGC',
        '',
        'Foo',
        '',
        '',
        ''
    )

Beispiel #11

0

Datei anzeigen

def test_linked_adapter():
    front_adapter = Adapter('AAAA', where=Where.PREFIX, min_overlap=4)
    back_adapter = Adapter('TTTT', where=Where.BACK, min_overlap=3)

    linked_adapter = LinkedAdapter(front_adapter,
                                   back_adapter,
                                   front_required=True,
                                   back_required=False,
                                   name='name')
    assert linked_adapter.front_adapter.min_overlap == 4
    assert linked_adapter.back_adapter.min_overlap == 3

    sequence = Sequence(name='seq', sequence='AAAACCCCCTTTT')
    trimmed = linked_adapter.match_to(sequence).trimmed()
    assert trimmed.name == 'seq'
    assert trimmed.sequence == 'CCCCC'

Beispiel #12

0

Datei anzeigen

def test_end_trim_with_mismatch():
    """
    Test the not-so-obvious case where an adapter of length 13 is trimmed from
    the end of a sequence with overlap 9 and there is one deletion.
    In this case the algorithm starts with 10 bases of the adapter to get
    the hit and so the match is considered good. An insertion or substitution
    at the same spot is not a match.
    """
    adapter = Adapter('TCGATCGATCGAT', BACK, max_error_rate=0.1)

    read = Sequence('foo1', 'AAAAAAAAAAATCGTCGATC')
    cutter = AdapterCutter([adapter], times=1)
    trimmed_read = cutter(read, [])

    assert trimmed_read.sequence == 'AAAAAAAAAAA'
    assert cutter.adapter_statistics[adapter].back.lengths == {9: 1}
    # We see 1 error at length 9 even though the number of allowed mismatches at
    # length 9 is 0.
    assert cutter.adapter_statistics[adapter].back.errors[9][1] == 1

    read = Sequence('foo2', 'AAAAAAAAAAATCGAACGA')
    cutter = AdapterCutter([adapter], times=1)
    trimmed_read = cutter(read, [])

    assert trimmed_read.sequence == read.sequence
    assert cutter.adapter_statistics[adapter].back.lengths == {}

Beispiel #13

0

Datei anzeigen

Datei: trimSeq.py Projekt: LiuzLab/CRISPRcloud-standalone

def trim(sampleFileName, ADAPT_FRONT="GTGGAAAGGACGAAACACC", max_err=0.1, max_length=24):
    adapter_front = [Adapter(ADAPT_FRONT, FRONT, max_err)]
    cutter_front = AdapterCutter(adapter_front, times=1)

    if not sampleFileName.lower().endswith('gz'):
        with open(sampleFileName, "r") as inp:
            return [ cutter_front(read).sequence[:20] for lineno, read in enumerate(readFastq(inp)) ]
    else:
        with gzip.open(sampleFileName, "r") as inp:
            return [ (cutter_front(read)).sequence[:20] for lineno, read in enumerate(readFastq(inp)) ]

Beispiel #14

0

Datei anzeigen

Datei: testtrim.py Projekt: vincenzopennone/BacPipe

def test_statistics():
    read = Sequence('name', 'AAAACCCCAAAA')
    adapters = [Adapter('CCCC', BACK, 0.1)]
    cutter = AdapterCutter(adapters, times=3)
    trimmed_read = cutter(read)
    # TODO make this a lot simpler
    trimmed_bp = 0
    for adapter in adapters:
        for d in (adapter.lengths_front, adapter.lengths_back):
            trimmed_bp += sum(seqlen * count for (seqlen, count) in d.items())
    assert trimmed_bp <= len(read), trimmed_bp

Beispiel #15

0

Datei anzeigen

def test_issue_52():
	adapter = Adapter(
		sequence='GAACTCCAGTCACNNNNN',
		where=BACK,
		max_error_rate=0.12,
		min_overlap=5,
		read_wildcards=False,
		adapter_wildcards=True)
	read = Sequence(name="abc", sequence='CCCCAGAACTACAGTCCCGGC')
	am = Match(astart=0, astop=17, rstart=5, rstop=21, matches=15, errors=2, front=None, adapter=adapter, read=read)
	assert am.wildcards() == 'GGC'
	"""

Beispiel #16

0

Datei anzeigen

def test_statistics():
    read = Sequence('name', 'AAAACCCCAAAA')
    adapters = [Adapter('CCCC', BACK, max_error_rate=0.1)]
    cutter = AdapterCutter(adapters, times=3)
    trimmed_read = cutter(read, [])
    # TODO make this a lot simpler
    trimmed_bp = 0
    for adapter in adapters:
        for d in (cutter.adapter_statistics[adapter].front.lengths,
                  cutter.adapter_statistics[adapter].back.lengths):
            trimmed_bp += sum(seqlen * count for (seqlen, count) in d.items())
    assert trimmed_bp <= len(read), trimmed_bp

Beispiel #17

0

Datei anzeigen

def test_anywhere_with_errors():
    adapter = Adapter('CCGCATTTAG', ANYWHERE, max_error_rate=0.1)
    for seq, expected_trimmed in (
        ('AACCGGTTccgcatttagGATC', 'AACCGGTT'),
        ('AACCGGTTccgcgtttagGATC', 'AACCGGTT'),  # one mismatch
        ('AACCGGTTccgcatttag', 'AACCGGTT'),
        ('ccgcatttagAACCGGTT', 'AACCGGTT'),
        ('ccgtatttagAACCGGTT', 'AACCGGTT'),  # one mismatch
        ('ccgatttagAACCGGTT', 'AACCGGTT'),  # one deletion
    ):
        read = Sequence('foo', seq)
        cutter = AdapterCutter([adapter], times=1)
        trimmed_read = cutter(read, [])
        assert trimmed_read.sequence == expected_trimmed

Beispiel #18

0

Datei anzeigen

Datei: test_adapters.py Projekt: marcelm/cutadapt

def test_no_indels_empty_read(where):
    # Issue #376
    adapter = Adapter('ACGT', where=where, indels=False)
    empty = Sequence('name', '')
    adapter.match_to(empty)

Beispiel #19

0

Datei anzeigen

 def parse_adapters(adapter, error_rate=None):
     adapters = []
     for name, seq, where in gather_adapters(adapter.split(','), [], []):
         adapters.append(Adapter(seq, where, error_rate, name=name))
     return adapters

Beispiel #20

0

Datei anzeigen

Datei: testadapters.py Projekt: pdimitrov/cutadapt

def test_str():
    a = Adapter('ACGT', where=BACK, max_error_rate=0.1)
    str(a)
    str(a.match_to(Sequence(name='seq', sequence='TTACGT')))
    ca = ColorspaceAdapter('0123', where=BACK, max_error_rate=0.1)
    str(ca)

Beispiel #21

0

Datei anzeigen

Datei: testadapters.py Projekt: MMesbahU/cutadapt

def test_str():
	a = Adapter('ACGT', where=BACK, max_error_rate=0.1)
	str(a)
	str(a.match_to(Sequence(name='seq', sequence='TTACGT')))
	ca = ColorspaceAdapter('0123', where=BACK, max_error_rate=0.1)
	str(ca)

Beispiel #22

0

Datei anzeigen

def test_str():
    a = Adapter('ACGT', where=Where.BACK, remove='suffix', max_error_rate=0.1)
    str(a)
    str(a.match_to(Sequence(name='seq', sequence='TTACGT')))

Beispiel #23

0

Datei anzeigen

def test_no_indels_empty_read(where):
    # Issue #376
    adapter = Adapter('ACGT', where=where, indels=False)
    empty = Sequence('name', '')
    adapter.match_to(empty)

Beispiel #24

0

Datei anzeigen

Datei: test_adapters.py Projekt: marcelm/cutadapt

def test_str():
    a = Adapter('ACGT', where=Where.BACK, remove='suffix', max_error_rate=0.1)
    str(a)
    str(a.match_to(Sequence(name='seq', sequence='TTACGT')))