Esempio n. 1
0
    def test_strip_seq_by_quality_lucy():
        "It tests strip_seq_by_quality_lucy2"
        seq = "ATCGATCAGTCAGACTGACAGACTCAGATCAGATCAGCATCAGCATACGATACGCATCAGACT"
        seq += "ACGATCGATCGATCGACAGATCATCGATCATCGACGACTAGACGATCATCGATACGCAGACTC"
        seq += "AGCAGACTACGAGATCAGCAGCATCAGCAGCA"
        qual = "00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 "
        qual += "00 00 00 00 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 "
        qual += "60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 "
        qual += "60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 "
        qual += "60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 "
        qual += "60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 "
        qual += "60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 "
        qual += "60 60 60 60 60 60 60 60 60 60 60 60 60 60 00 00 00 00"
        qual = qual.split()
        seq = Seq(seq)
        seqrec1 = SeqWithQuality(name="seq1", seq=seq, qual=qual, description="desc1")

        qual = "40 40 40 37 40 40 37 37 37 37 37 37 37 37 40 42 42 42 44 44 "
        qual += "56 56 42 40 40 40 40 36 36 28 35 32 35 35 40 42 37 37 35 37 "
        qual += "32 35 35 35 35 35 35 38 33 33 24 33 33 42 33 35 35 35 35 33 "
        qual += "36 30 30 24 29 29 35 35 35 35 29 29 29 35 38 38 38 37 35 33 "
        qual += "29 35 35 34 30 30 30 33 29 31 31 29 29 29 28 28 24 21 16 16 "
        qual += "21 24 29 29 32 40 27 27 25 25 21 30 27 28 28 32 23 23 21 24 "
        qual += "24 17 18 19 21 15 19 11 9 9 11 23 17 15 10 10 10 20 27 25 23 "
        qual += "18 22 23 24 18 10 10 13 13 18 19 10 12 12 18 16 14 10 10 11 "
        qual += "16 13 21 19 31 19 27 27 28 26 29 25 25 20 19 23 28 28 19 20 "
        qual += "13 9 9 9 9 9 17 15 21 17 14 12 21 17 19 24 28 24 23 "
        quality = qual.split()
        seq = "ATCGATCAGTCAGACTGACAGACTCAGATCAGATCAGCATCAGCATACGATACGCATCAGACT"
        seq += "ACGATCGATCGATCGACAGATCATCGATCATCGACGACTAGACGATCATCGATACGCAGACTC"
        seq += "AGCAGACTACGAGATCAGCAGCATCAGCAGCAAGCAGACTACGAGATCAGCAGCATCAGCAGC"
        seq += "ATTACGATGAT"
        seq = Seq(seq)
        seqrec2 = SeqWithQuality(seq=seq, qual=quality, name="seq2", description="desc2")
        seq_iter = iter([seqrec1, seqrec2])
        seq_trimmer = create_seq_trim_and_masker()
        lucy_striper = create_striper_by_quality_lucy()
        # pylint:disable-msg=W0612
        seq_iter = lucy_striper(seq_iter)
        new_seqs = []
        for seq in seq_iter:
            new_seqs.append(seq_trimmer(seq))
        seq = new_seqs[0].seq
        assert seqrec1.description == new_seqs[0].description
        assert seq.startswith("CAGATCAGATCAGCATCAGCAT")
        assert seq.endswith("CGAGATCAGCAGCATCAGC")
        assert len(new_seqs) == 2
        assert new_seqs[1].description == "desc2"

        # now we test the sequence with adaptors
        vector_fpath = os.path.join(TEST_DATA_DIR, "lucy", "icugi_vector.fasta")
        splice_fpath = os.path.join(TEST_DATA_DIR, "lucy", "icugi_splice.fasta")
        parameters = {"vector": [vector_fpath, splice_fpath], "bracket": [10, 0.02]}
        lucy_striper = create_striper_by_quality_lucy(parameters)
        seq_fhand = open(os.path.join(TEST_DATA_DIR, "lucy", "seq_with_adaptor1.fastq"))
        seq_iter = lucy_striper(seqs_in_file(seq_fhand, format="fastq"))
        new_seqs = []
        for seq in seq_iter:
            new_seqs.append(seq_trimmer(seq))
    def test_reverse_complement():
        'It test the reverse complement fucntion'
        sequence1 = Seq('aaaccttt')

        # First we initialice the quality in the init
        seq1 = SeqWithQuality(name='seq1', seq=sequence1, \
                               qual=[2, 4 , 1, 4, 5, 6, 12, 34])
        seq2 = reverse_complement(seq1)
        assert seq2.seq == sequence1.reverse_complement()
        assert seq2.qual == seq1.qual[::-1]
 def test_add():
     'It tests the add method'
     seq = Seq('AC') + Seq('TG')
     assert seq == 'ACTG'
     assert seq.complement() #is still a Seq
 def test_complement():
     'It test the Seq complement method'
     seq = Seq('ACTG')
     seq2 = seq.complement()
     assert seq2 == 'TGAC'
Esempio n. 5
0
    def test_strip_seq_by_quality_lucy():
        'It tests strip_seq_by_quality_lucy2'
        seq =  'ATCGATCAGTCAGACTGACAGACTCAGATCAGATCAGCATCAGCATACGATACGCATCAGACT'
        seq += 'ACGATCGATCGATCGACAGATCATCGATCATCGACGACTAGACGATCATCGATACGCAGACTC'
        seq += 'AGCAGACTACGAGATCAGCAGCATCAGCAGCA'
        qual =  '00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 '
        qual += '00 00 00 00 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 '
        qual += '60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 '
        qual += '60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 '
        qual += '60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 '
        qual += '60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 '
        qual += '60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 60 '
        qual += '60 60 60 60 60 60 60 60 60 60 60 60 60 60 00 00 00 00'
        qual = qual.split()
        seq = Seq(seq)
        seqrec1 = SeqWithQuality(name='seq1', seq=seq, qual=qual,
                                  description ='desc1')

        qual  = '40 40 40 37 40 40 37 37 37 37 37 37 37 37 40 42 42 42 44 44 '
        qual += '56 56 42 40 40 40 40 36 36 28 35 32 35 35 40 42 37 37 35 37 '
        qual += '32 35 35 35 35 35 35 38 33 33 24 33 33 42 33 35 35 35 35 33 '
        qual += '36 30 30 24 29 29 35 35 35 35 29 29 29 35 38 38 38 37 35 33 '
        qual += '29 35 35 34 30 30 30 33 29 31 31 29 29 29 28 28 24 21 16 16 '
        qual += '21 24 29 29 32 40 27 27 25 25 21 30 27 28 28 32 23 23 21 24 '
        qual += '24 17 18 19 21 15 19 11 9 9 11 23 17 15 10 10 10 20 27 25 23 '
        qual += '18 22 23 24 18 10 10 13 13 18 19 10 12 12 18 16 14 10 10 11 '
        qual += '16 13 21 19 31 19 27 27 28 26 29 25 25 20 19 23 28 28 19 20 '
        qual += '13 9 9 9 9 9 17 15 21 17 14 12 21 17 19 24 28 24 23 '
        quality = qual.split()
        seq =  'ATCGATCAGTCAGACTGACAGACTCAGATCAGATCAGCATCAGCATACGATACGCATCAGACT'
        seq += 'ACGATCGATCGATCGACAGATCATCGATCATCGACGACTAGACGATCATCGATACGCAGACTC'
        seq += 'AGCAGACTACGAGATCAGCAGCATCAGCAGCAAGCAGACTACGAGATCAGCAGCATCAGCAGC'
        seq += 'ATTACGATGAT'
        seq = Seq(seq)
        seqrec2 = SeqWithQuality(seq=seq, qual=quality, name='seq2',
                                 description ='desc2')
        seq_iter = iter([seqrec1, seqrec2])
        seq_trimmer = create_seq_trim_and_masker()
        lucy_striper = create_striper_by_quality_lucy()
        #pylint:disable-msg=W0612
        seq_iter = lucy_striper(seq_iter)
        new_seqs = []
        for seq in seq_iter:
            new_seqs.append(seq_trimmer(seq))
        seq = new_seqs[0].seq
        assert seqrec1.description == new_seqs[0].description
        assert seq.startswith('CAGATCAGATCAGCATCAGCAT')
        assert seq.endswith('CGAGATCAGCAGCATCAGC')
        assert len(new_seqs) == 2
        assert new_seqs[1].description == 'desc2'

        # now we test the sequence with adaptors
        vector_fpath = os.path.join(TEST_DATA_DIR, 'lucy', 'icugi_vector.fasta')
        splice_fpath = os.path.join(TEST_DATA_DIR, 'lucy', 'icugi_splice.fasta')
        parameters = {'vector':[vector_fpath, splice_fpath],
                      'bracket':[10, 0.02]}
        lucy_striper = create_striper_by_quality_lucy(parameters)
        seq_fhand = open(os.path.join(TEST_DATA_DIR, 'lucy',
                                      'seq_with_adaptor1.fastq'))
        seq_iter = lucy_striper(seqs_in_file(seq_fhand, format='fastq'))
        new_seqs = []
        for seq in seq_iter:
            new_seqs.append(seq_trimmer(seq))