Example #1
0
    def _do_trim(self, seq):
        'It trims the edges of the given seqs.'
        annots = get_annotations(seq)
        if not TRIMMING_RECOMMENDATIONS in annots:
            return seq

        trim_rec = annots[TRIMMING_RECOMMENDATIONS]
        # fixing the trimming recommendations
        if TRIMMING_RECOMMENDATIONS in annots:
            del annots[TRIMMING_RECOMMENDATIONS]

        trim_segments = []
        for trim_kind in TRIMMING_KINDS:
            trim_segments.extend(trim_rec.get(trim_kind, []))

        # masking
        if self.mask:
            seq = _mask_sequence(seq, trim_segments)
        else:
            # trimming
            if trim_segments:
                trim_limits = get_longest_complementary_segment(
                                            trim_segments, get_length(seq))
                if trim_limits is None:
                    # there's no sequence left
                    return None
            else:
                trim_limits = []

            if trim_limits:
                seq = slice_seq(seq, trim_limits[0], trim_limits[1] + 1)

        return seq
Example #2
0
    def _do_trim(self, seq):
        'It trims the edges of the given seqs.'
        annots = get_annotations(seq)
        if not TRIMMING_RECOMMENDATIONS in annots:
            return seq

        trim_rec = annots[TRIMMING_RECOMMENDATIONS]
        # fixing the trimming recommendations
        if TRIMMING_RECOMMENDATIONS in annots:
            del annots[TRIMMING_RECOMMENDATIONS]

        trim_segments = []
        for trim_kind in TRIMMING_KINDS:
            trim_segments.extend(trim_rec.get(trim_kind, []))

        # masking
        if self.mask:
            seq = _mask_sequence(seq, trim_segments)
        else:
            # trimming
            if trim_segments:
                trim_limits = get_longest_complementary_segment(
                                            trim_segments, get_length(seq))
                if trim_limits is None:
                    # there's no sequence left
                    return None
            else:
                trim_limits = []

            if trim_limits:
                seq = slice_seq(seq, trim_limits[0], trim_limits[1] + 1)

        return seq
Example #3
0
    def test_blast_short_trimming(self):
        'It trims oligos using blast-short'

        oligo1 = SeqRecord(Seq('AAGCAGTGGTATCAACGCAGAGTACATGGG'))
        oligo2 = SeqRecord(Seq('AAGCAGTGGTATCAACGCAGAGTACTTTTT'))
        oligo1 = SeqWrapper(SEQRECORD, oligo1, None)
        oligo2 = SeqWrapper(SEQRECORD, oligo2, None)

        adaptors = [oligo1, oligo2]

        blast_trim = TrimWithBlastShort(oligos=adaptors)
        fhand = StringIO(FASTQ4)
        seq_packets = read_seq_packets([fhand],
                                       prefered_seq_classes=[SEQRECORD])
        trim_packets = list(seq_to_trim_packets(seq_packets))
        trim_packets2 = blast_trim(trim_packets[0])
        # It should trim the first and the second reads.
        res = [
            get_annotations(s).get(TRIMMING_RECOMMENDATIONS,
                                   {}).get(VECTOR, [])
            for l in trim_packets2[SEQS_PASSED] for s in l
        ]
        assert res == [[(0, 29)], [(0, 29)], []]

        # With SeqItems
        oligo1 = SeqItem('oligo1',
                         ['>oligo1\n', 'AAGCAGTGGTATCAACGCAGAGTACATGGG\n'])
        oligo2 = SeqItem('oligo2',
                         ['>oligo2\n', 'AAGCAGTGGTATCAACGCAGAGTACTTTTT\n'])
        oligo1 = SeqWrapper(SEQITEM, oligo1, 'fasta')
        oligo2 = SeqWrapper(SEQITEM, oligo2, 'fasta')

        adaptors = [oligo1, oligo2]

        blast_trim = TrimWithBlastShort(oligos=adaptors)
        fhand = StringIO(FASTQ4)
        seq_packets = list(
            read_seq_packets([fhand], prefered_seq_classes=[SEQITEM]))
        trim_packets = list(seq_to_trim_packets(seq_packets))
        trim_packets2 = blast_trim(trim_packets[0])
        # It should trim the first and the second reads.
        res = [
            get_annotations(s).get(TRIMMING_RECOMMENDATIONS,
                                   {}).get(VECTOR, [])
            for l in trim_packets2[SEQS_PASSED] for s in l
        ]
        assert res == [[(0, 29)], [(0, 29)], []]
Example #4
0
    def test_blast_short_trimming(self):
        'It trims oligos using blast-short'

        oligo1 = SeqRecord(Seq('AAGCAGTGGTATCAACGCAGAGTACATGGG'))
        oligo2 = SeqRecord(Seq('AAGCAGTGGTATCAACGCAGAGTACTTTTT'))
        oligo1 = SeqWrapper(SEQRECORD, oligo1, None)
        oligo2 = SeqWrapper(SEQRECORD, oligo2, None)

        adaptors = [oligo1, oligo2]

        blast_trim = TrimWithBlastShort(oligos=adaptors)
        fhand = StringIO(FASTQ4)
        seq_packets = read_seq_packets([fhand],
                                            prefered_seq_classes=[SEQRECORD])
        trim_packets = list(seq_to_trim_packets(seq_packets))
        trim_packets2 = blast_trim(trim_packets[0])
        # It should trim the first and the second reads.
        res = [get_annotations(s).get(TRIMMING_RECOMMENDATIONS, {}).get(VECTOR,
                                                                        [])
                            for l in trim_packets2[SEQS_PASSED] for s in l]
        assert res == [[(0, 29)], [(0, 29)], []]

        # With SeqItems
        oligo1 = SeqItem('oligo1', ['>oligo1\n',
                                    'AAGCAGTGGTATCAACGCAGAGTACATGGG\n'])
        oligo2 = SeqItem('oligo2', ['>oligo2\n',
                                    'AAGCAGTGGTATCAACGCAGAGTACTTTTT\n'])
        oligo1 = SeqWrapper(SEQITEM, oligo1, 'fasta')
        oligo2 = SeqWrapper(SEQITEM, oligo2, 'fasta')

        adaptors = [oligo1, oligo2]

        blast_trim = TrimWithBlastShort(oligos=adaptors)
        fhand = StringIO(FASTQ4)
        seq_packets = list(read_seq_packets([fhand],
                                            prefered_seq_classes=[SEQITEM]))
        trim_packets = list(seq_to_trim_packets(seq_packets))
        trim_packets2 = blast_trim(trim_packets[0])
        # It should trim the first and the second reads.
        res = [get_annotations(s).get(TRIMMING_RECOMMENDATIONS, {}).get(VECTOR,
                                                                        [])
                            for l in trim_packets2[SEQS_PASSED] for s in l]
        assert res == [[(0, 29)], [(0, 29)], []]
Example #5
0
    def test_trimming(self):
        'The sequences are trimmed according to the recommendations.'
        seq1 = 'gggtctcatcatcaggg'.upper()
        seq = SeqRecord(Seq(seq1), annotations={TRIMMING_RECOMMENDATIONS: {}})
        seq = SeqWrapper(SEQRECORD, seq, None)

        trim_rec = get_annotations(seq)[TRIMMING_RECOMMENDATIONS]
        seq_trimmer = TrimOrMask()

        trim_rec['vector'] = [(0, 3), (8, 13)]
        get_annotations(seq)[TRIMMING_RECOMMENDATIONS] = trim_rec
        seqs2 = seq_trimmer([seq])
        assert get_str_seq(seqs2[0]) == 'CTCA'

        trim_rec['vector'] = [(0, 0), (8, 13)]
        get_annotations(seq)[TRIMMING_RECOMMENDATIONS] = trim_rec
        seqs2 = seq_trimmer([seq])
        assert get_str_seq(seqs2[0]) == 'GGTCTCA'

        trim_rec['vector'] = [(0, 1), (8, 12)]
        trim_rec['quality'] = [(1, 8), (13, 17)]
        get_annotations(seq)[TRIMMING_RECOMMENDATIONS] = trim_rec
        seqs2 = seq_trimmer([seq])
        assert not seqs2

        trim_rec['vector'] = [(0, 0), (8, 13)]
        trim_rec['quality'] = []
        get_annotations(seq)[TRIMMING_RECOMMENDATIONS] = trim_rec
        seqs2 = seq_trimmer([seq])
        assert get_str_seq(seqs2[0]) == 'GGTCTCA'
        assert TRIMMING_RECOMMENDATIONS not in get_annotations(seqs2[0])
Example #6
0
    def __call__(self, seqs):
        'It trims the edges of the given seqs.'
        mask = self.mask
        processed_seqs = []
        for seq in seqs:
            annots = get_annotations(seq)
            if not TRIMMING_RECOMMENDATIONS in annots:
                processed_seqs.append(copy_seq(seq))
                continue

            trim_rec = annots[TRIMMING_RECOMMENDATIONS]
            # fixing the trimming recommendations
            if TRIMMING_RECOMMENDATIONS in annots:
                del annots[TRIMMING_RECOMMENDATIONS]

            trim_segments = []
            for trim_kind in TRIMMING_KINDS:
                trim_segments.extend(trim_rec.get(trim_kind, []))

            # masking
            if mask:
                seq = _mask_sequence(seq, trim_segments)
            else:
                # trimming
                if trim_segments:
                    trim_limits = get_longest_complementary_segment(
                                                trim_segments, get_length(seq))
                    if trim_limits is None:
                        # there's no sequence left
                        continue
                else:
                    trim_limits = []

                if trim_limits:
                    seq = slice_seq(seq, trim_limits[0], trim_limits[1] + 1)

            processed_seqs.append(seq)

        return processed_seqs
Example #7
0
    def test_trim_chimeric_region(self):
        index_fpath = os.path.join(TEST_DATA_DIR, 'ref_example.fasta')
        query1 = '@seq2 f\nGGGATCGCAGACCCATCTCGTCAGCATGTACCCTTGCTACATTGAACTT'
        query1 += 'AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n'
        query1 += '+\n$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$'
        query1 += '$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$\n'
        query2 = '@seq2 r\nCATCATTGCATAAGTAACACTCAACCAACAGTGCTACAGGGTTGTAACG\n'
        query2 += '+\n$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$\n'
        query = query1 + query2
        fhand = NamedTemporaryFile()
        fhand.write(query)
        fhand.flush()

        trim_chimeras = TrimMatePairChimeras(index_fpath)
        seq_packets = list(read_seq_packets([open(fhand.name)]))
        trim_packets = list(seq_to_trim_packets(seq_packets))
        trim_packets2 = trim_chimeras(trim_packets[0])
        # It should trim the first and the second reads.
        res = [get_annotations(s).get(TRIMMING_RECOMMENDATIONS, {}).get(OTHER,
                                                                        [])
                            for l in trim_packets2[SEQS_PASSED] for s in l]
        assert res == [[(49, 105)], []]
Example #8
0
    def test_trim_chimeric_region(self):
        index_fpath = os.path.join(TEST_DATA_DIR, 'ref_example.fasta')
        query1 = '@seq2 f\nGGGATCGCAGACCCATCTCGTCAGCATGTACCCTTGCTACATTGAACTT'
        query1 += 'AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n'
        query1 += '+\n$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$'
        query1 += '$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$\n'
        query2 = '@seq2 r\nCATCATTGCATAAGTAACACTCAACCAACAGTGCTACAGGGTTGTAACG\n'
        query2 += '+\n$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$\n'
        query = query1 + query2
        fhand = NamedTemporaryFile()
        fhand.write(query)
        fhand.flush()

        trim_chimeras = TrimMatePairChimeras(index_fpath)
        seq_packets = list(read_seq_packets([open(fhand.name)]))
        trim_packets = list(seq_to_trim_packets(seq_packets))
        trim_packets2 = trim_chimeras(trim_packets[0])
        # It should trim the first and the second reads.
        res = [
            get_annotations(s).get(TRIMMING_RECOMMENDATIONS,
                                   {}).get(OTHER, [])
            for l in trim_packets2[SEQS_PASSED] for s in l
        ]
        assert res == [[(49, 105)], []]
Example #9
0
    def test_trimming(self):
        'The sequences are trimmed according to the recommendations.'
        seq1 = 'gggtctcatcatcaggg'.upper()
        seq = SeqRecord(Seq(seq1), annotations={TRIMMING_RECOMMENDATIONS: {}})
        seq = SeqWrapper(SEQRECORD, seq, None)
        seqs = [seq]
        trim_packet = {SEQS_PASSED: [seqs], ORPHAN_SEQS: []}

        trim_rec = get_annotations(seq)[TRIMMING_RECOMMENDATIONS]
        seq_trimmer = TrimOrMask()

        trim_rec['vector'] = [(0, 3), (8, 13)]
        get_annotations(seq)[TRIMMING_RECOMMENDATIONS] = trim_rec
        trim_packet2 = seq_trimmer(trim_packet)
        res = [get_str_seq(s) for l in trim_packet2[SEQS_PASSED] for s in l]
        assert res == ['CTCA']

        trim_rec['vector'] = [(0, 0), (8, 13)]
        get_annotations(seq)[TRIMMING_RECOMMENDATIONS] = trim_rec
        trim_packet2 = seq_trimmer(trim_packet)
        res = [get_str_seq(s) for l in trim_packet2[SEQS_PASSED] for s in l]
        assert res == ['GGTCTCA']

        trim_rec['vector'] = [(0, 1), (8, 12)]
        trim_rec['quality'] = [(1, 8), (13, 17)]
        get_annotations(seq)[TRIMMING_RECOMMENDATIONS] = trim_rec
        trim_packet2 = seq_trimmer(trim_packet)
        assert not trim_packet2[SEQS_PASSED]

        trim_rec['vector'] = [(0, 0), (8, 13)]
        trim_rec['quality'] = []
        get_annotations(seq)[TRIMMING_RECOMMENDATIONS] = trim_rec
        trim_packet2 = seq_trimmer(trim_packet)
        res = [get_str_seq(s) for l in trim_packet2[SEQS_PASSED] for s in l]
        assert res == ['GGTCTCA']
        trim_packet2[SEQS_PASSED][0][0]
        assert TRIMMING_RECOMMENDATIONS not in get_annotations(
            trim_packet2[SEQS_PASSED][0][0])
Example #10
0
    def test_trimming(self):
        'The sequences are trimmed according to the recommendations.'
        seq1 = 'gggtctcatcatcaggg'.upper()
        seq = SeqRecord(Seq(seq1), annotations={TRIMMING_RECOMMENDATIONS: {}})
        seq = SeqWrapper(SEQRECORD, seq, None)
        seqs = [seq]
        trim_packet = {SEQS_PASSED: [seqs], ORPHAN_SEQS: []}

        trim_rec = get_annotations(seq)[TRIMMING_RECOMMENDATIONS]
        seq_trimmer = TrimOrMask()

        trim_rec['vector'] = [(0, 3), (8, 13)]
        get_annotations(seq)[TRIMMING_RECOMMENDATIONS] = trim_rec
        trim_packet2 = seq_trimmer(trim_packet)
        res = [get_str_seq(s) for l in trim_packet2[SEQS_PASSED] for s in l]
        assert res == ['CTCA']

        trim_rec['vector'] = [(0, 0), (8, 13)]
        get_annotations(seq)[TRIMMING_RECOMMENDATIONS] = trim_rec
        trim_packet2 = seq_trimmer(trim_packet)
        res = [get_str_seq(s) for l in trim_packet2[SEQS_PASSED] for s in l]
        assert res == ['GGTCTCA']

        trim_rec['vector'] = [(0, 1), (8, 12)]
        trim_rec['quality'] = [(1, 8), (13, 17)]
        get_annotations(seq)[TRIMMING_RECOMMENDATIONS] = trim_rec
        trim_packet2 = seq_trimmer(trim_packet)
        assert not trim_packet2[SEQS_PASSED]

        trim_rec['vector'] = [(0, 0), (8, 13)]
        trim_rec['quality'] = []
        get_annotations(seq)[TRIMMING_RECOMMENDATIONS] = trim_rec
        trim_packet2 = seq_trimmer(trim_packet)
        res = [get_str_seq(s) for l in trim_packet2[SEQS_PASSED] for s in l]
        assert res == ['GGTCTCA']
        trim_packet2[SEQS_PASSED][0][0]
        assert TRIMMING_RECOMMENDATIONS not in get_annotations(trim_packet2[SEQS_PASSED][0][0])