def test_strip_seq_by_quality_lucy(): "It tests strip_seq_by_quality_lucy2" seq = "ATCGATCAGTCAGACTGACAGACTCAGATCAGATCAGCATCAGCATACGATACGCATCAGACT" seq += "ACGATCGATCGATCGACAGATCATCGATCATCGACGACTAGACGATCATCGATACGCAGACTC" seq += "AGCAGACTACGAGATCAGCAGCATCAGCAGCA" qual = "00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 " qual += "00 00 00 00 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 " qual += "60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 " qual += "60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 " qual += "60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 " qual += "60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 " qual += "60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 " qual += "60 60 60 60 60 60 60 60 60 60 60 60 60 60 00 00 00 00" qual = qual.split() seq = Seq(seq) seqrec1 = SeqWithQuality(name="seq1", seq=seq, qual=qual, description="desc1") qual = "40 40 40 37 40 40 37 37 37 37 37 37 37 37 40 42 42 42 44 44 " qual += "56 56 42 40 40 40 40 36 36 28 35 32 35 35 40 42 37 37 35 37 " qual += "32 35 35 35 35 35 35 38 33 33 24 33 33 42 33 35 35 35 35 33 " qual += "36 30 30 24 29 29 35 35 35 35 29 29 29 35 38 38 38 37 35 33 " qual += "29 35 35 34 30 30 30 33 29 31 31 29 29 29 28 28 24 21 16 16 " qual += "21 24 29 29 32 40 27 27 25 25 21 30 27 28 28 32 23 23 21 24 " qual += "24 17 18 19 21 15 19 11 9 9 11 23 17 15 10 10 10 20 27 25 23 " qual += "18 22 23 24 18 10 10 13 13 18 19 10 12 12 18 16 14 10 10 11 " qual += "16 13 21 19 31 19 27 27 28 26 29 25 25 20 19 23 28 28 19 20 " qual += "13 9 9 9 9 9 17 15 21 17 14 12 21 17 19 24 28 24 23 " quality = qual.split() seq = "ATCGATCAGTCAGACTGACAGACTCAGATCAGATCAGCATCAGCATACGATACGCATCAGACT" seq += "ACGATCGATCGATCGACAGATCATCGATCATCGACGACTAGACGATCATCGATACGCAGACTC" seq += "AGCAGACTACGAGATCAGCAGCATCAGCAGCAAGCAGACTACGAGATCAGCAGCATCAGCAGC" seq += "ATTACGATGAT" seq = Seq(seq) seqrec2 = SeqWithQuality(seq=seq, qual=quality, name="seq2", description="desc2") seq_iter = iter([seqrec1, seqrec2]) seq_trimmer = create_seq_trim_and_masker() lucy_striper = create_striper_by_quality_lucy() # pylint:disable-msg=W0612 seq_iter = lucy_striper(seq_iter) new_seqs = [] for seq in seq_iter: new_seqs.append(seq_trimmer(seq)) seq = new_seqs[0].seq assert seqrec1.description == new_seqs[0].description assert seq.startswith("CAGATCAGATCAGCATCAGCAT") assert seq.endswith("CGAGATCAGCAGCATCAGC") assert len(new_seqs) == 2 assert new_seqs[1].description == "desc2" # now we test the sequence with adaptors vector_fpath = os.path.join(TEST_DATA_DIR, "lucy", "icugi_vector.fasta") splice_fpath = os.path.join(TEST_DATA_DIR, "lucy", "icugi_splice.fasta") parameters = {"vector": [vector_fpath, splice_fpath], "bracket": [10, 0.02]} lucy_striper = create_striper_by_quality_lucy(parameters) seq_fhand = open(os.path.join(TEST_DATA_DIR, "lucy", "seq_with_adaptor1.fastq")) seq_iter = lucy_striper(seqs_in_file(seq_fhand, format="fastq")) new_seqs = [] for seq in seq_iter: new_seqs.append(seq_trimmer(seq))
def test_reverse_complement(): 'It test the reverse complement fucntion' sequence1 = Seq('aaaccttt') # First we initialice the quality in the init seq1 = SeqWithQuality(name='seq1', seq=sequence1, \ qual=[2, 4 , 1, 4, 5, 6, 12, 34]) seq2 = reverse_complement(seq1) assert seq2.seq == sequence1.reverse_complement() assert seq2.qual == seq1.qual[::-1]
def test_add(): 'It tests the add method' seq = Seq('AC') + Seq('TG') assert seq == 'ACTG' assert seq.complement() #is still a Seq
def test_complement(): 'It test the Seq complement method' seq = Seq('ACTG') seq2 = seq.complement() assert seq2 == 'TGAC'
def test_strip_seq_by_quality_lucy(): 'It tests strip_seq_by_quality_lucy2' seq = 'ATCGATCAGTCAGACTGACAGACTCAGATCAGATCAGCATCAGCATACGATACGCATCAGACT' seq += 'ACGATCGATCGATCGACAGATCATCGATCATCGACGACTAGACGATCATCGATACGCAGACTC' seq += 'AGCAGACTACGAGATCAGCAGCATCAGCAGCA' qual = '00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ' qual += '00 00 00 00 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 ' qual += '60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 ' qual += '60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 ' qual += '60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 ' qual += '60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 ' qual += '60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 ' qual += '60 60 60 60 60 60 60 60 60 60 60 60 60 60 00 00 00 00' qual = qual.split() seq = Seq(seq) seqrec1 = SeqWithQuality(name='seq1', seq=seq, qual=qual, description ='desc1') qual = '40 40 40 37 40 40 37 37 37 37 37 37 37 37 40 42 42 42 44 44 ' qual += '56 56 42 40 40 40 40 36 36 28 35 32 35 35 40 42 37 37 35 37 ' qual += '32 35 35 35 35 35 35 38 33 33 24 33 33 42 33 35 35 35 35 33 ' qual += '36 30 30 24 29 29 35 35 35 35 29 29 29 35 38 38 38 37 35 33 ' qual += '29 35 35 34 30 30 30 33 29 31 31 29 29 29 28 28 24 21 16 16 ' qual += '21 24 29 29 32 40 27 27 25 25 21 30 27 28 28 32 23 23 21 24 ' qual += '24 17 18 19 21 15 19 11 9 9 11 23 17 15 10 10 10 20 27 25 23 ' qual += '18 22 23 24 18 10 10 13 13 18 19 10 12 12 18 16 14 10 10 11 ' qual += '16 13 21 19 31 19 27 27 28 26 29 25 25 20 19 23 28 28 19 20 ' qual += '13 9 9 9 9 9 17 15 21 17 14 12 21 17 19 24 28 24 23 ' quality = qual.split() seq = 'ATCGATCAGTCAGACTGACAGACTCAGATCAGATCAGCATCAGCATACGATACGCATCAGACT' seq += 'ACGATCGATCGATCGACAGATCATCGATCATCGACGACTAGACGATCATCGATACGCAGACTC' seq += 'AGCAGACTACGAGATCAGCAGCATCAGCAGCAAGCAGACTACGAGATCAGCAGCATCAGCAGC' seq += 'ATTACGATGAT' seq = Seq(seq) seqrec2 = SeqWithQuality(seq=seq, qual=quality, name='seq2', description ='desc2') seq_iter = iter([seqrec1, seqrec2]) seq_trimmer = create_seq_trim_and_masker() lucy_striper = create_striper_by_quality_lucy() #pylint:disable-msg=W0612 seq_iter = lucy_striper(seq_iter) new_seqs = [] for seq in seq_iter: new_seqs.append(seq_trimmer(seq)) seq = new_seqs[0].seq assert seqrec1.description == new_seqs[0].description assert seq.startswith('CAGATCAGATCAGCATCAGCAT') assert seq.endswith('CGAGATCAGCAGCATCAGC') assert len(new_seqs) == 2 assert new_seqs[1].description == 'desc2' # now we test the sequence with adaptors vector_fpath = os.path.join(TEST_DATA_DIR, 'lucy', 'icugi_vector.fasta') splice_fpath = os.path.join(TEST_DATA_DIR, 'lucy', 'icugi_splice.fasta') parameters = {'vector':[vector_fpath, splice_fpath], 'bracket':[10, 0.02]} lucy_striper = create_striper_by_quality_lucy(parameters) seq_fhand = open(os.path.join(TEST_DATA_DIR, 'lucy', 'seq_with_adaptor1.fastq')) seq_iter = lucy_striper(seqs_in_file(seq_fhand, format='fastq')) new_seqs = [] for seq in seq_iter: new_seqs.append(seq_trimmer(seq))