Beispiel #1
0
    def test_trim_seqs():
        'It tests the trim seq function'
        seqs = []
        seqs.append([SeqWrapper(SEQRECORD, SeqRecord(Seq('aaCTTTC')), None)])
        seqs.append([SeqWrapper(SEQRECORD, SeqRecord(Seq('CTTCaa')), None)])
        seqs.append([SeqWrapper(SEQRECORD, SeqRecord(Seq('aaCTCaa')), None)])
        seqs.append([SeqWrapper(SEQRECORD, SeqRecord(Seq('actg')), None)])
        seqs.append([SeqWrapper(SEQRECORD, SeqRecord(Seq('AC')), None)])

        trim_packet = {SEQS_PASSED: seqs, ORPHAN_SEQS: []}
        trim_lowercased_seqs = TrimLowercasedLetters()
        trim = TrimOrMask()
        # pylint: disable=W0141
        trim_packet = trim(trim_lowercased_seqs(trim_packet))
        res = [get_str_seq(s) for l in trim_packet[SEQS_PASSED] for s in l]

        assert res == ['CTTTC', 'CTTC', 'CTC', 'AC']

        seqs = []
        seq = SeqItem('s', ['>s\n', 'aaCTTTC\n'])
        seqs.append([SeqWrapper(SEQITEM, seq, 'fasta')])
        trim_packet = {SEQS_PASSED: seqs, ORPHAN_SEQS: []}
        trim_packet = trim(trim_lowercased_seqs(trim_packet))
        res = [get_str_seq(s) for l in trim_packet[SEQS_PASSED] for s in l]

        assert res == ['CTTTC']

        # with pairs
        seq = SeqItem('s.f', ['>s.f\n', 'aaCTTTC\n'])
        seq1 = SeqItem('s.r', ['>s.r\n', 'aaCTTTC\n'])
        seq2 = SeqItem('s1.f', ['>s1.f\n', 'aa\n'])
        seq3 = SeqItem('s1.r', ['>s1.r\n', 'aaCTTTC\n'])
        seqs = []
        seqs.append([SeqWrapper(SEQITEM, seq, 'fasta'),
                     SeqWrapper(SEQITEM, seq1, 'fasta')])
        seqs.append([SeqWrapper(SEQITEM, seq2, 'fasta'),
                     SeqWrapper(SEQITEM, seq3, 'fasta')])
        trim_packet = {SEQS_PASSED: seqs, ORPHAN_SEQS: []}
        trim_lowercased_seqs = TrimLowercasedLetters()
        trim = TrimOrMask()
        # pylint: disable=W0141
        trim_packet = trim(trim_lowercased_seqs(trim_packet))
        res = [get_str_seq(s) for l in trim_packet[SEQS_PASSED] for s in l]
        orphan_res = [get_str_seq(s) for s in trim_packet[ORPHAN_SEQS]]
        assert orphan_res == ['CTTTC']
        assert ['CTTTC', 'CTTTC'] == res

        # no drag
        trim_packet = {SEQS_PASSED: seqs, ORPHAN_SEQS: []}
        trim_lowercased_seqs = TrimLowercasedLetters()
        trim = TrimOrMask()
        # pylint: disable=W0141
        trim_packet = trim(trim_lowercased_seqs(trim_packet))
        res = [get_str_seq(s) for l in trim_packet[SEQS_PASSED] for s in l]
        orphan_res = [get_name(s) for s in trim_packet[ORPHAN_SEQS]]
        assert orphan_res == ['s1.r']
        assert ['CTTTC', 'CTTTC'] == res
Beispiel #2
0
    def test_trimming(self):
        'The sequences are trimmed according to the recommendations.'
        seq1 = 'gggtctcatcatcaggg'.upper()
        seq = SeqRecord(Seq(seq1), annotations={TRIMMING_RECOMMENDATIONS: {}})
        seq = SeqWrapper(SEQRECORD, seq, None)
        seqs = [seq]
        trim_packet = {SEQS_PASSED: [seqs], ORPHAN_SEQS: []}

        trim_rec = get_annotations(seq)[TRIMMING_RECOMMENDATIONS]
        seq_trimmer = TrimOrMask()

        trim_rec['vector'] = [(0, 3), (8, 13)]
        get_annotations(seq)[TRIMMING_RECOMMENDATIONS] = trim_rec
        trim_packet2 = seq_trimmer(trim_packet)
        res = [get_str_seq(s) for l in trim_packet2[SEQS_PASSED] for s in l]
        assert res == ['CTCA']

        trim_rec['vector'] = [(0, 0), (8, 13)]
        get_annotations(seq)[TRIMMING_RECOMMENDATIONS] = trim_rec
        trim_packet2 = seq_trimmer(trim_packet)
        res = [get_str_seq(s) for l in trim_packet2[SEQS_PASSED] for s in l]
        assert res == ['GGTCTCA']

        trim_rec['vector'] = [(0, 1), (8, 12)]
        trim_rec['quality'] = [(1, 8), (13, 17)]
        get_annotations(seq)[TRIMMING_RECOMMENDATIONS] = trim_rec
        trim_packet2 = seq_trimmer(trim_packet)
        assert not trim_packet2[SEQS_PASSED]

        trim_rec['vector'] = [(0, 0), (8, 13)]
        trim_rec['quality'] = []
        get_annotations(seq)[TRIMMING_RECOMMENDATIONS] = trim_rec
        trim_packet2 = seq_trimmer(trim_packet)
        res = [get_str_seq(s) for l in trim_packet2[SEQS_PASSED] for s in l]
        assert res == ['GGTCTCA']
        trim_packet2[SEQS_PASSED][0][0]
        assert TRIMMING_RECOMMENDATIONS not in get_annotations(
            trim_packet2[SEQS_PASSED][0][0])
Beispiel #3
0
    def test_quality_trimming(self):
        'It trims the edges'
        trim = TrimOrMask()

        trim_quality = TrimByQuality(window=5, threshold=30)

        seq = SeqRecord(Seq('ACTGCTGCATAAAA'))
        quals = [10, 10, 20, 30, 30, 30, 40, 40, 30, 30, 20, 20, 10, 10]
        seq.letter_annotations['phred_quality'] = quals
        seq = SeqWrapper(SEQRECORD, seq, None)
        trim_packet = {SEQS_PASSED: [[seq]], ORPHAN_SEQS: []}
        trim_packet2 = trim(trim_quality(trim_packet))
        seq2 = trim_packet2[SEQS_PASSED][0][0]
        assert get_int_qualities(seq2) == [20, 30, 30, 30, 40, 40, 30, 30, 20]

        # all bad
        trim_quality = TrimByQuality(window=5, threshold=60)
        trim_packet = {SEQS_PASSED: [[seq]], ORPHAN_SEQS: []}
        trim_packet2 = trim(trim_quality(trim_packet))
        assert not trim_packet2[SEQS_PASSED]

        # all OK
        trim_quality = TrimByQuality(window=5, threshold=5)
        trim_packet = {SEQS_PASSED: [[seq]], ORPHAN_SEQS: []}
        trim_packet2 = trim(trim_quality(trim_packet))
        seq2 = trim_packet2[SEQS_PASSED][0][0]
        assert get_int_qualities(seq2) == quals

        seq = SeqRecord(Seq('ACTGCTGCATAA'))
        quals = [20, 20, 20, 60, 60, 60, 60, 60, 20, 20, 20, 20]

        trim_quality = TrimByQuality(window=5, threshold=50)
        seq.letter_annotations['phred_quality'] = quals
        seq = SeqWrapper(SEQRECORD, seq, None)
        trim_packet = {SEQS_PASSED: [[seq]], ORPHAN_SEQS: []}
        trim_packet2 = trim(trim_quality(trim_packet))
        seq2 = trim_packet2[SEQS_PASSED][0][0]
        assert get_int_qualities(seq2) == [20, 60, 60, 60, 60, 60, 20]

        quals = [
            40, 18, 10, 40, 40, 5, 8, 30, 14, 3, 40, 40, 40, 11, 6, 5, 3, 20,
            10, 12, 8, 5, 4, 7, 1
        ]
        seq = SeqRecord(Seq('atatatatagatagatagatagatg'))
        seq.letter_annotations['phred_quality'] = quals
        seq = SeqWrapper(SEQRECORD, seq, None)
        trim_packet = {SEQS_PASSED: [[seq]], ORPHAN_SEQS: []}
        trim_quality = TrimByQuality(window=5, threshold=25)
        trim_packet2 = trim(trim_quality(trim_packet))
        seq2 = trim_packet2[SEQS_PASSED][0][0]
        assert get_int_qualities(seq2) == [40, 18, 10, 40, 40]

        quals = [
            40, 40, 13, 11, 40, 9, 40, 4, 27, 38, 40, 4, 11, 40, 40, 10, 10,
            21, 3, 40, 9, 9, 12, 10, 9
        ]
        seq = SeqRecord(Seq('atatatatatatatatatatatata'))
        seq.letter_annotations['phred_quality'] = quals
        seq = SeqWrapper(SEQRECORD, seq, None)
        trim_quality = TrimByQuality(window=5, threshold=25)
        trim_packet = {SEQS_PASSED: [[seq]], ORPHAN_SEQS: []}

        trim_packet2 = trim(trim_quality(trim_packet))
        seq2 = trim_packet2[SEQS_PASSED][0][0]
        expected = [40, 4, 27, 38, 40]
        assert get_int_qualities(seq2) == expected

        quals = [
            40, 40, 13, 11, 40, 9, 40, 4, 27, 38, 40, 4, 11, 40, 40, 10, 10,
            21, 3, 40, 9, 9, 12, 10, 9
        ]
        seq = SeqRecord(Seq('atatatatatatatatatatatata'))
        seq.letter_annotations['phred_quality'] = quals
        seq = SeqWrapper(SEQRECORD, seq, None)
        trim_packet = {SEQS_PASSED: [[seq]], ORPHAN_SEQS: []}
        trim_quality = TrimByQuality(window=5, threshold=25, trim_left=False)
        trim_packet2 = trim(trim_quality(trim_packet))
        seq2 = trim_packet2[SEQS_PASSED][0][0]
        assert get_int_qualities(seq2) == [
            40, 40, 13, 11, 40, 9, 40, 4, 27, 38, 40
        ]

        quals = [
            40, 40, 13, 11, 40, 9, 40, 4, 27, 38, 40, 4, 11, 40, 40, 10, 10,
            21, 3, 40, 9, 9, 12, 10, 9
        ]
        seq = SeqRecord(Seq('atatatatatatatatatatatata'))
        seq.letter_annotations['phred_quality'] = quals
        seq = SeqWrapper(SEQRECORD, seq, None)
        trim_packet = {SEQS_PASSED: [[seq]], ORPHAN_SEQS: []}
        trim_quality = TrimByQuality(window=5, threshold=25, trim_right=False)
        trim_packet2 = trim(trim_quality(trim_packet))
        seq2 = trim_packet2[SEQS_PASSED][0][0]
        assert get_int_qualities(seq2) == [
            40, 4, 27, 38, 40, 4, 11, 40, 40, 10, 10, 21, 3, 40, 9, 9, 12, 10,
            9
        ]

        quals = [
            40, 40, 13, 11, 40, 9, 40, 4, 27, 38, 40, 4, 11, 40, 40, 10, 10,
            21, 3, 40, 9, 9, 12, 10, 9
        ]
        seq = SeqRecord(Seq('atatatatatatatatatatatata'))
        seq.letter_annotations['phred_quality'] = quals
        seq = SeqWrapper(SEQRECORD, seq, None)
        trim_quality = TrimByQuality(window=5,
                                     threshold=25,
                                     trim_right=False,
                                     trim_left=False)
        trim_packet = {SEQS_PASSED: [[seq]], ORPHAN_SEQS: []}
        trim_packet2 = trim(trim_quality(trim_packet))
        seq2 = trim_packet2[SEQS_PASSED][0][0]
        assert get_int_qualities(seq2) == quals

        # With SeqItems
        seq = SeqItem('s', [
            '@s\n', 'atatatatatatatatatatatata\n', '\n',
            'II.,I*I%<GI%,II++6$I**-+*\n'
        ])
        seq = SeqWrapper(SEQITEM, seq, 'fastq')
        trim_quality = TrimByQuality(window=5,
                                     threshold=25,
                                     trim_right=True,
                                     trim_left=False)

        trim_packet = {SEQS_PASSED: [[seq]], ORPHAN_SEQS: []}
        trim_packet2 = trim(trim_quality(trim_packet))
        seq2 = trim_packet2[SEQS_PASSED][0][0]
        assert seq2.object.lines[3] == 'II.,I*I%<GI\n'
Beispiel #4
0
    def test_edge_trimming(self):
        'It trims the edges'
        trim = TrimOrMask()
        trim_edges = TrimEdges(left=1)
        trim_packet = trim(trim_edges(self._some_seqs()))
        res = [get_str_seq(s) for l in trim_packet[SEQS_PASSED] for s in l]
        assert res == ['CCG', 'AACCCGGG']

        trim_edges = TrimEdges(right=1)
        trim_packet = trim(trim_edges(self._some_seqs()))
        res = [get_str_seq(s) for l in trim_packet[SEQS_PASSED] for s in l]
        assert res == ['ACC', 'AAACCCGG']

        trim_edges = TrimEdges(left=1, right=1)
        trim_packet = trim(trim_edges(self._some_seqs()))
        res = [get_str_seq(s) for l in trim_packet[SEQS_PASSED] for s in l]
        assert res == ['CC', 'AACCCGG']

        trim_edges = TrimEdges(left=2, right=2)
        trim_packet = trim(trim_edges(self._some_seqs()))
        res = [get_str_seq(s) for l in trim_packet[SEQS_PASSED] for s in l]
        assert res == ['ACCCG']

        trim_edges = TrimEdges(left=3, right=3)
        trim_packet = trim(trim_edges(self._some_seqs()))
        res = [get_str_seq(s) for l in trim_packet[SEQS_PASSED] for s in l]
        assert res == ['CCC']

        trim = TrimOrMask(mask=True)
        trim_edges = TrimEdges(left=1)
        trim_packet = trim(trim_edges(self._some_seqs()))
        res = [get_str_seq(s) for l in trim_packet[SEQS_PASSED] for s in l]
        assert res == ['aCCG', 'aAACCCGGG']

        trim_edges = TrimEdges(right=1)
        trim_packet = trim(trim_edges(self._some_seqs()))
        res = [get_str_seq(s) for l in trim_packet[SEQS_PASSED] for s in l]
        assert res == ['ACCg', 'AAACCCGGg']

        trim_edges = TrimEdges(left=1, right=1)
        trim_packet = trim(trim_edges(self._some_seqs()))
        res = [get_str_seq(s) for l in trim_packet[SEQS_PASSED] for s in l]
        assert res == ['aCCg', 'aAACCCGGg']

        trim_edges = TrimEdges(left=2, right=2)
        trim_packet = trim(trim_edges(self._some_seqs()))
        res = [get_str_seq(s) for l in trim_packet[SEQS_PASSED] for s in l]
        assert res == ['accg', 'aaACCCGgg']

        trim_edges = TrimEdges(left=3, right=3)
        trim_packet = trim(trim_edges(self._some_seqs()))
        res = [get_str_seq(s) for l in trim_packet[SEQS_PASSED] for s in l]
        assert res == ['accg', 'aaaCCCggg']

        # test overlapping mask
        trim1 = TrimEdges(left=3, right=3)
        trim2 = TrimEdges(left=4, right=4)
        trim_packet = trim(trim2(trim1(self._some_seqs())))
        res = [get_str_seq(s) for l in trim_packet[SEQS_PASSED] for s in l]
        assert res == ['accg', 'aaacCcggg']

        # With a SeqItem
        trim = TrimOrMask(mask=False)
        trim_edges = TrimEdges(left=1, right=1)

        seq = SeqItem('s', ['>s\n', 'ACTTTC\n'])
        seqs = [[SeqWrapper(SEQITEM, seq, 'fasta')]]
        trim_packet = {SEQS_PASSED: seqs, ORPHAN_SEQS: []}

        trim_packet = trim(trim_edges(trim_packet))
        res = [get_str_seq(s) for l in trim_packet[SEQS_PASSED] for s in l]
        assert res == ['CTTT']

        trim = TrimOrMask(mask=True)
        seq = SeqItem('s', ['>s\n', 'ACTTTC\n'])
        seqs = [[SeqWrapper(SEQITEM, seq, 'fasta')]]
        trim_packet = {SEQS_PASSED: seqs, ORPHAN_SEQS: []}
        trim_packet = trim(trim_edges(trim_packet))
        res = [get_str_seq(s) for l in trim_packet[SEQS_PASSED] for s in l]
        assert res == ['aCTTTc']