def test_blast_short_trimming(self): 'It trims oligos using blast-short' oligo1 = SeqRecord(Seq('AAGCAGTGGTATCAACGCAGAGTACATGGG')) oligo2 = SeqRecord(Seq('AAGCAGTGGTATCAACGCAGAGTACTTTTT')) oligo1 = SeqWrapper(SEQRECORD, oligo1, None) oligo2 = SeqWrapper(SEQRECORD, oligo2, None) adaptors = [oligo1, oligo2] blast_trim = TrimWithBlastShort(oligos=adaptors) fhand = StringIO(FASTQ4) seq_packets = read_seq_packets([fhand], prefered_seq_classes=[SEQRECORD]) trim_packets = list(seq_to_trim_packets(seq_packets)) trim_packets2 = blast_trim(trim_packets[0]) # It should trim the first and the second reads. res = [ get_annotations(s).get(TRIMMING_RECOMMENDATIONS, {}).get(VECTOR, []) for l in trim_packets2[SEQS_PASSED] for s in l ] assert res == [[(0, 29)], [(0, 29)], []] # With SeqItems oligo1 = SeqItem('oligo1', ['>oligo1\n', 'AAGCAGTGGTATCAACGCAGAGTACATGGG\n']) oligo2 = SeqItem('oligo2', ['>oligo2\n', 'AAGCAGTGGTATCAACGCAGAGTACTTTTT\n']) oligo1 = SeqWrapper(SEQITEM, oligo1, 'fasta') oligo2 = SeqWrapper(SEQITEM, oligo2, 'fasta') adaptors = [oligo1, oligo2] blast_trim = TrimWithBlastShort(oligos=adaptors) fhand = StringIO(FASTQ4) seq_packets = list( read_seq_packets([fhand], prefered_seq_classes=[SEQITEM])) trim_packets = list(seq_to_trim_packets(seq_packets)) trim_packets2 = blast_trim(trim_packets[0]) # It should trim the first and the second reads. res = [ get_annotations(s).get(TRIMMING_RECOMMENDATIONS, {}).get(VECTOR, []) for l in trim_packets2[SEQS_PASSED] for s in l ] assert res == [[(0, 29)], [(0, 29)], []]
def test_blast_short_trimming(self): 'It trims oligos using blast-short' oligo1 = SeqRecord(Seq('AAGCAGTGGTATCAACGCAGAGTACATGGG')) oligo2 = SeqRecord(Seq('AAGCAGTGGTATCAACGCAGAGTACTTTTT')) oligo1 = SeqWrapper(SEQRECORD, oligo1, None) oligo2 = SeqWrapper(SEQRECORD, oligo2, None) adaptors = [oligo1, oligo2] blast_trim = TrimWithBlastShort(oligos=adaptors) fhand = StringIO(FASTQ4) seq_packets = read_seq_packets([fhand], prefered_seq_classes=[SEQRECORD]) trim_packets = list(seq_to_trim_packets(seq_packets)) trim_packets2 = blast_trim(trim_packets[0]) # It should trim the first and the second reads. res = [get_annotations(s).get(TRIMMING_RECOMMENDATIONS, {}).get(VECTOR, []) for l in trim_packets2[SEQS_PASSED] for s in l] assert res == [[(0, 29)], [(0, 29)], []] # With SeqItems oligo1 = SeqItem('oligo1', ['>oligo1\n', 'AAGCAGTGGTATCAACGCAGAGTACATGGG\n']) oligo2 = SeqItem('oligo2', ['>oligo2\n', 'AAGCAGTGGTATCAACGCAGAGTACTTTTT\n']) oligo1 = SeqWrapper(SEQITEM, oligo1, 'fasta') oligo2 = SeqWrapper(SEQITEM, oligo2, 'fasta') adaptors = [oligo1, oligo2] blast_trim = TrimWithBlastShort(oligos=adaptors) fhand = StringIO(FASTQ4) seq_packets = list(read_seq_packets([fhand], prefered_seq_classes=[SEQITEM])) trim_packets = list(seq_to_trim_packets(seq_packets)) trim_packets2 = blast_trim(trim_packets[0]) # It should trim the first and the second reads. res = [get_annotations(s).get(TRIMMING_RECOMMENDATIONS, {}).get(VECTOR, []) for l in trim_packets2[SEQS_PASSED] for s in l] assert res == [[(0, 29)], [(0, 29)], []]
def test_trim_chimeric_region(self): index_fpath = os.path.join(TEST_DATA_DIR, 'ref_example.fasta') query1 = '@seq2 f\nGGGATCGCAGACCCATCTCGTCAGCATGTACCCTTGCTACATTGAACTT' query1 += 'AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n' query1 += '+\n$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$' query1 += '$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$\n' query2 = '@seq2 r\nCATCATTGCATAAGTAACACTCAACCAACAGTGCTACAGGGTTGTAACG\n' query2 += '+\n$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$\n' query = query1 + query2 fhand = NamedTemporaryFile() fhand.write(query) fhand.flush() trim_chimeras = TrimMatePairChimeras(index_fpath) seq_packets = list(read_seq_packets([open(fhand.name)])) trim_packets = list(seq_to_trim_packets(seq_packets)) trim_packets2 = trim_chimeras(trim_packets[0]) # It should trim the first and the second reads. res = [get_annotations(s).get(TRIMMING_RECOMMENDATIONS, {}).get(OTHER, []) for l in trim_packets2[SEQS_PASSED] for s in l] assert res == [[(49, 105)], []]
def test_trim_chimeric_region(self): index_fpath = os.path.join(TEST_DATA_DIR, 'ref_example.fasta') query1 = '@seq2 f\nGGGATCGCAGACCCATCTCGTCAGCATGTACCCTTGCTACATTGAACTT' query1 += 'AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n' query1 += '+\n$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$' query1 += '$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$\n' query2 = '@seq2 r\nCATCATTGCATAAGTAACACTCAACCAACAGTGCTACAGGGTTGTAACG\n' query2 += '+\n$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$\n' query = query1 + query2 fhand = NamedTemporaryFile() fhand.write(query) fhand.flush() trim_chimeras = TrimMatePairChimeras(index_fpath) seq_packets = list(read_seq_packets([open(fhand.name)])) trim_packets = list(seq_to_trim_packets(seq_packets)) trim_packets2 = trim_chimeras(trim_packets[0]) # It should trim the first and the second reads. res = [ get_annotations(s).get(TRIMMING_RECOMMENDATIONS, {}).get(OTHER, []) for l in trim_packets2[SEQS_PASSED] for s in l ] assert res == [[(49, 105)], []]