コード例 #1
0
 def test_statistics(self):
     c = Sequence('>name', '   1 ACTG')
     #r = {'A':1, 'C':1, 'T':1, 'G':1, 'N':0 'L':9}
     self.assertEqual({
         'A': 1,
         'C': 1,
         'T': 1,
         'G': 1,
         'N': 0,
         'L': 4
     }, c.statistics())
     c = Sequence('>name', '   1 ACTG NNNNNNNN')
     self.assertEqual({
         'A': 1,
         'C': 1,
         'T': 1,
         'G': 1,
         'N': 8,
         'L': 12
     }, c.statistics())
     c = Sequence('>name', '   1 ACTG NNNNNNNN\naaanan')
     self.assertEqual({
         'A': 5,
         'C': 1,
         'T': 1,
         'G': 1,
         'N': 10,
         'L': 18
     }, c.statistics())
コード例 #2
0
ファイル: test_sequence.py プロジェクト: BiobankLab/FA_TOOL
 def test_generic_validation(self):
     self.assertEqual(Sequence.generic_validate('ACTGactg', '[^ACGNTUBDHKMRSVWY\-\nacgntubdhkmrsvwy]'), 1)
     self.assertEqual(Sequence.generic_validate('ACTGactgee', '[^ACGNTUBDHKMRSVWY\-\nacgntubdhkmrsvwy]'), 0)
     seq = '   1 ACTGactgAC aaatttccca ACTGactgaa aaatttccca\n   41 ACTGactgAA aaatttccca ACTGactgtt aaatttccca\n   81 ACTGactgGG aaatttccca ACTGactgcc aaatt'
     self.assertEqual(Sequence.generic_validate(seq, '[^ACGNTUBDHKMRSVWY\-\nacgntubdhkmrsvwy]'), 1)
     # not allowed symbol e
     seq = '   1 ACTGactgAC aaatttccca ACTGactgee aaatttccca\n   41 ACTGactgAA aaatttccca ACTGactgtt aaatttccca\n   81 ACTGactgGG aaatttccca ACTGactgcc aaatt'
     self.assertEqual(Sequence.generic_validate(seq, '[^ACGNTUBDHKMRSVWY\-\nacgntubdhkmrsvwy]'), 0)
     # not allowed symbol e + len <
     seq = '   1 ACTGactgee aaatttccca ACTGactgee aaatttccca\n   51 ACTGactgee aaatttccca ACTGactgee aaatttccca\n   81 ACTGactgee aaatttccca ACTGactgee aaatt'
     self.assertEqual(Sequence.generic_validate(seq, '[^ACGNTUBDHKMRSVWY\-\nacgntubdhkmrsvwy]'), 0)
     
     # not allowed symbol e + len >
     seq = '   1 ACTGactgee aaatttccca ACTGactgee aaatttccca\n   31 ACTGactgee aaatttccca ACTGactgee aaatttccca\n   81 ACTGactgee aaatttccca ACTGactgee aaatt'
     self.assertEqual(Sequence.generic_validate(seq, '[^ACGNTUBDHKMRSVWY\-\nacgntubdhkmrsvwy]'), 0)
     
     # len >
     seq = '   1 ACTGactgTT aaatttcc ACTGactgAa aaatttccca\n   31 ACTGactgcT aaatttccca ACTGactgCT aaatttccca\n   81 ACTGactgAg aaatttccca ACTGactggg aaatt'
     self.assertEqual(Sequence.generic_validate(seq, '[^ACGNTUBDHKMRSVWY\-\nacgntubdhkmrsvwy]'), 0)
     
     # len <
     seq = '   1 ACTGactgTT aaatttcc ACTGactgAa aaatttccca\n   51 ACTGactgcT aaatttccca ACTGactgCT aaatttccca\n   81 ACTGactgAg aaatttccca ACTGactggg aaatt'
     self.assertEqual(Sequence.generic_validate(seq, '[^ACGNTUBDHKMRSVWY\-\nacgntubdhkmrsvwy]'), 0)
     
     # last len >
     seq = '   1 ACTGactgTT aaatttcc ACTGactgAa aaatttccca\n   51 ACTGactgcT aaatttccca ACTGactgCT aaatttccca\n   81 ACTGactgAg aaatttccca ACTGactggg aaattaaattaaatt'
     self.assertEqual(Sequence.generic_validate(seq, '[^ACGNTUBDHKMRSVWY\-\nacgntubdhkmrsvwy]'), 0)
     
     # last bad symbol
     seq = '   1 ACTGactgTT aaatttcc ACTGactgAa aaatttccca\n   41 ACTGactgcT aaatttccca ACTGactgCT aaatttccca\n   81 ACTGactgAg aaatttccca ACTGactggg aaatte'
     self.assertEqual(Sequence.generic_validate(seq, '[^ACGNTUBDHKMRSVWY\-\nacgntubdhkmrsvwy]'), 0)
コード例 #3
0
 def test_detailed_validate_generic(self):
     self.assertEqual(
         Sequence.detailed_validate_generic(
             'ACTGactg', '[^ACGNTUBDHKMRSVWY\-\nacgntubdhkmrsvwy]'), [])
     seq = '   1 ACTGactgAC aaatttccca ACTGactgaa aaatttccca\n   41 ACTGactgAA aaatttccca ACTGactgtt aaatttccca\n   81 ACTGactgGG aaatttccca ACTGactgcc aaatt'
     self.assertEqual(
         Sequence.detailed_validate_generic(
             seq, '[^ACGNTUBDHKMRSVWY\-\nacgntubdhkmrsvwy]'), [])
     seq = '   1 ACTGactgAC aaatttccca ACTGactgee aaatttccca\n   41 ACTGactgAA aaatttccca ACTGactgtt aaatttccca\n   81 ACTGactgGG aaatttccca ACTGactgcc aaatt'
     self.assertEqual(
         Sequence.detailed_validate_generic(
             seq, '[^ACGNTUBDHKMRSVWY\-\nacgntubdhkmrsvwy]'),
         ['line 1: block 3 contains illegal chars'])
     seq = '   1 ACTGactgee aaatttccca ACTGactgee aaatttccca\n   51 ACTGactgee aaatttccca ACTGactgee aaatttccca\n   81 ACTGactgee aaatttccca ACTGactgee aaatt'
     message = [
         'line 1: bad line length',
         'line 1: block 1 contains illegal chars',
         'line 1: block 3 contains illegal chars',
         'line 2: bad line length',
         'line 2: block 1 contains illegal chars',
         'line 2: block 3 contains illegal chars',
         'line 3: block 1 contains illegal chars',
         'line 3: block 3 contains illegal chars',
     ]
     self.assertEqual(
         Sequence.detailed_validate_generic(
             seq, '[^ACGNTUBDHKMRSVWY\-\nacgntubdhkmrsvwy]'), message)
コード例 #4
0
    def test_validate_seq(self):
        c = Sequence('>name', 'ACTGactg')
        self.assertEqual(c.validate_seq(), 1)
        c = Sequence('>name', 'ACTGactgee')
        self.assertEqual(c.validate_seq(), 0)
        c = Sequence(
            '>name',
            '   1 ACTGactgAC aaatttccca ACTGactgaa aaatttccca\n   41 ACTGactgAA aaatttccca ACTGactgtt aaatttccca\n   81 ACTGactgGG aaatttccca ACTGactgcc aaatt'
        )
        self.assertEqual(c.validate_seq(), 1)

        # not allowed symbol e
        c = Sequence(
            '>name',
            '   1 ACTGactgee aaatttccca ACTGactgee aaatttccca\n   41 ACTGactgee aaatttccca ACTGactgee aaatttccca\n   81 ACTGactgee aaatttccca ACTGactgee aaatt'
        )
        self.assertEqual(c.validate_seq(), 0)

        # not allowed symbol e + len <
        c = Sequence(
            '>name',
            '   1 ACTGactgee aaatttccca ACTGactgee aaatttccca\n   51 ACTGactgee aaatttccca ACTGactgee aaatttccca\n   81 ACTGactgee aaatttccca ACTGactgee aaatt'
        )
        self.assertEqual(c.validate_seq(), 0)

        # not allowed symbol e + len >
        c = Sequence(
            '>name',
            '   1 ACTGactgee aaatttccca ACTGactgee aaatttccca\n   31 ACTGactgee aaatttccca ACTGactgee aaatttccca\n   81 ACTGactgee aaatttccca ACTGactgee aaatt'
        )
        self.assertEqual(c.validate_seq(), 0)

        # len >
        c = Sequence(
            '>name',
            '   1 ACTGactgTT aaatttcc ACTGactgAa aaatttccca\n   31 ACTGactgcT aaatttccca ACTGactgCT aaatttccca\n   81 ACTGactgAg aaatttccca ACTGactggg aaatt'
        )
        self.assertEqual(c.validate_seq(), 0)

        # len <
        c = Sequence(
            '>name',
            '   1 ACTGactgTT aaatttcc ACTGactgAa aaatttccca\n   51 ACTGactgcT aaatttccca ACTGactgCT aaatttccca\n   81 ACTGactgAg aaatttccca ACTGactggg aaatt'
        )
        self.assertEqual(c.validate_seq(), 0)

        # last len >
        c = Sequence(
            '>name',
            '   1 ACTGactgTT aaatttcc ACTGactgAa aaatttccca\n   51 ACTGactgcT aaatttccca ACTGactgCT aaatttccca\n   81 ACTGactgAg aaatttccca ACTGactggg aaattaaattaaatt'
        )
        self.assertEqual(c.validate_seq(), 0)

        # last bad symbol
        c = Sequence(
            '>name',
            '   1 ACTGactgTT aaatttcc ACTGactgAa aaatttccca\n   41 ACTGactgcT aaatttccca ACTGactgCT aaatttccca\n   81 ACTGactgAg aaatttccca ACTGactggg aaatte'
        )
        self.assertEqual(c.validate_seq(), 0)
コード例 #5
0
 def test_cut(self):
     c = Sequence('>name', '   1 ACTG')
     c.normalize()
     cl = [
         Sequence('>name_frag_1:1', 'A'),
         Sequence('>name_frag_2:2', 'C'),
         Sequence('>name_frag_3:3', 'T'),
         Sequence('>name_frag_4:4', 'G')
     ]
     rcl = c.cut(1, 1)
     self.assertEqual(str(cl[0]), str(rcl[0]))
     self.assertEqual(str(cl[1]), str(rcl[1]))
コード例 #6
0
    def test_generic_validation(self):
        self.assertEqual(
            Sequence.generic_validate(
                'ACTGactg', '[^ACGNTUBDHKMRSVWY\-\nacgntubdhkmrsvwy]'), 1)
        self.assertEqual(
            Sequence.generic_validate(
                'ACTGactgee', '[^ACGNTUBDHKMRSVWY\-\nacgntubdhkmrsvwy]'), 0)
        seq = '   1 ACTGactgAC aaatttccca ACTGactgaa aaatttccca\n   41 ACTGactgAA aaatttccca ACTGactgtt aaatttccca\n   81 ACTGactgGG aaatttccca ACTGactgcc aaatt'
        self.assertEqual(
            Sequence.generic_validate(
                seq, '[^ACGNTUBDHKMRSVWY\-\nacgntubdhkmrsvwy]'), 1)
        # not allowed symbol e
        seq = '   1 ACTGactgAC aaatttccca ACTGactgee aaatttccca\n   41 ACTGactgAA aaatttccca ACTGactgtt aaatttccca\n   81 ACTGactgGG aaatttccca ACTGactgcc aaatt'
        self.assertEqual(
            Sequence.generic_validate(
                seq, '[^ACGNTUBDHKMRSVWY\-\nacgntubdhkmrsvwy]'), 0)
        # not allowed symbol e + len <
        seq = '   1 ACTGactgee aaatttccca ACTGactgee aaatttccca\n   51 ACTGactgee aaatttccca ACTGactgee aaatttccca\n   81 ACTGactgee aaatttccca ACTGactgee aaatt'
        self.assertEqual(
            Sequence.generic_validate(
                seq, '[^ACGNTUBDHKMRSVWY\-\nacgntubdhkmrsvwy]'), 0)

        # not allowed symbol e + len >
        seq = '   1 ACTGactgee aaatttccca ACTGactgee aaatttccca\n   31 ACTGactgee aaatttccca ACTGactgee aaatttccca\n   81 ACTGactgee aaatttccca ACTGactgee aaatt'
        self.assertEqual(
            Sequence.generic_validate(
                seq, '[^ACGNTUBDHKMRSVWY\-\nacgntubdhkmrsvwy]'), 0)

        # len >
        seq = '   1 ACTGactgTT aaatttcc ACTGactgAa aaatttccca\n   31 ACTGactgcT aaatttccca ACTGactgCT aaatttccca\n   81 ACTGactgAg aaatttccca ACTGactggg aaatt'
        self.assertEqual(
            Sequence.generic_validate(
                seq, '[^ACGNTUBDHKMRSVWY\-\nacgntubdhkmrsvwy]'), 0)

        # len <
        seq = '   1 ACTGactgTT aaatttcc ACTGactgAa aaatttccca\n   51 ACTGactgcT aaatttccca ACTGactgCT aaatttccca\n   81 ACTGactgAg aaatttccca ACTGactggg aaatt'
        self.assertEqual(
            Sequence.generic_validate(
                seq, '[^ACGNTUBDHKMRSVWY\-\nacgntubdhkmrsvwy]'), 0)

        # last len >
        seq = '   1 ACTGactgTT aaatttcc ACTGactgAa aaatttccca\n   51 ACTGactgcT aaatttccca ACTGactgCT aaatttccca\n   81 ACTGactgAg aaatttccca ACTGactggg aaattaaattaaatt'
        self.assertEqual(
            Sequence.generic_validate(
                seq, '[^ACGNTUBDHKMRSVWY\-\nacgntubdhkmrsvwy]'), 0)

        # last bad symbol
        seq = '   1 ACTGactgTT aaatttcc ACTGactgAa aaatttccca\n   41 ACTGactgcT aaatttccca ACTGactgCT aaatttccca\n   81 ACTGactgAg aaatttccca ACTGactggg aaatte'
        self.assertEqual(
            Sequence.generic_validate(
                seq, '[^ACGNTUBDHKMRSVWY\-\nacgntubdhkmrsvwy]'), 0)
コード例 #7
0
 def test_leave_name_after_marker(self):
     c = Sequence('>test_something_special_gene=qwerty_ready', 'ACTGTACGGA')
     self.assertEqual(1, c.leave_name_after_marker('gene=', 20))
     self.assertEqual('>gene=qwerty_ready', c.name)
     #self.assertEqual()
     c2 = Sequence('>test_something_special_gene=qwerty_ready',
                   'ACTGTACGGA')
     self.assertEqual(1, c2.leave_name_after_marker('gene=', 11))
     self.assertEqual('>gene=qwerty', c2.name)
     c = Sequence('>test_something_special_gene=qwerty_ready', 'ACTGTACGGA')
     self.assertEqual(1, c.leave_name_after_marker('gene='))
     self.assertEqual('>gene=qwerty_ready', c.name)
     c2 = Sequence('>test_something_special_gene=qwerty_ready',
                   'ACTGTACGGA')
     self.assertEqual(1, c2.leave_name_after_marker('gene=', 6, 0))
     self.assertEqual('>qwerty', c2.name)
コード例 #8
0
 def test_find_aprox_motif(self):
     test = 'ATGGAATCGGCTTTTAATACTGCAGGGGCGTTAAGTTGGCATGAACTCACAACCAATAATACCGAAGAGGCCATGCGC'
     c = Sequence('>test', test)
     #print c.find_aprox_motif('TGGAATCGGCT',1)
     self.assertEqual(['TGGAATCGGCT'], c.find_aprox_motif('TGGAATCGGCT', 1))
     self.assertEqual(['TGGAATCGGCT'], c.find_aprox_motif('TAGAATCGGCT', 1))
     self.assertEqual([], c.find_aprox_motif('TAGAATCGGCT', 0))
     self.assertEqual(['TGGAATCGGCT'], c.find_aprox_motif('TGGAATCGGCT', 0))
コード例 #9
0
    def test_find_primers(self):
        test = 'ATGGAATCGGCTTTTAATACTGCAGGGGCGTTAAGTTGGCATGAACTCACAACCAATAATACCGAAGAGGCCATGCGC'
        c = Sequence('>test', test)
        self.assertEqual(['GGAATCGGCTTTTAATACTGCAGGGG'],
                         c.find_primers('GGAA', 'GGGG', 'FF'))

        self.assertEqual(
            ['AATCGGCT', 'AATACT', 'AAGTTGGCATGAACT', 'AACT', 'AACGCCCCT'],
            c.find_primers('AA', 'CT', 'ff'))
コード例 #10
0
ファイル: test_sequence.py プロジェクト: BiobankLab/FA_TOOL
 def test_detailed_validate_generic(self):
     self.assertEqual(Sequence.detailed_validate_generic('ACTGactg', '[^ACGNTUBDHKMRSVWY\-\nacgntubdhkmrsvwy]'), [])
     seq = '   1 ACTGactgAC aaatttccca ACTGactgaa aaatttccca\n   41 ACTGactgAA aaatttccca ACTGactgtt aaatttccca\n   81 ACTGactgGG aaatttccca ACTGactgcc aaatt'
     self.assertEqual(Sequence.detailed_validate_generic(seq, '[^ACGNTUBDHKMRSVWY\-\nacgntubdhkmrsvwy]'), [])
     seq = '   1 ACTGactgAC aaatttccca ACTGactgee aaatttccca\n   41 ACTGactgAA aaatttccca ACTGactgtt aaatttccca\n   81 ACTGactgGG aaatttccca ACTGactgcc aaatt'
     self.assertEqual(Sequence.detailed_validate_generic(seq, '[^ACGNTUBDHKMRSVWY\-\nacgntubdhkmrsvwy]'), ['line 1: block 3 contains illegal chars'])
     seq = '   1 ACTGactgee aaatttccca ACTGactgee aaatttccca\n   51 ACTGactgee aaatttccca ACTGactgee aaatttccca\n   81 ACTGactgee aaatttccca ACTGactgee aaatt'
     message = [
         'line 1: bad line length', 
         'line 1: block 1 contains illegal chars', 
         'line 1: block 3 contains illegal chars', 
         'line 2: bad line length',
         'line 2: block 1 contains illegal chars', 
         'line 2: block 3 contains illegal chars',
         'line 3: block 1 contains illegal chars', 
         'line 3: block 3 contains illegal chars',
     ]
     self.assertEqual(Sequence.detailed_validate_generic(seq, '[^ACGNTUBDHKMRSVWY\-\nacgntubdhkmrsvwy]'), message)
コード例 #11
0
 def convert_to_fq(self, quality):
     nlist = []
     i = 1
     for r in self.contigs:
         q = chr(33+quality)*len(r)
             #n = self.name.replace('>', '@')
             #n = n.replace(' ','_')
         n = '@EAS123:100:FC123VJ:2:'+str(i)+':'+str(i*7)+':'+str(i*8)+' 1:N:18:1'
         i += 1
         nlist.append(Sequence(n, r.seq, q))
     return Fa(nlist, self.name+'_fq')
コード例 #12
0
    def test_find_primers(self):
        test = 'ATGGAATCGGCTTTTAATACTGCAGGGGCGTTAAGTTGGCATGAACTCACAACCAATAATACCGAAGAGGCCATGCGCTTCTATGCTGAGATTTTTGGCTGGCACTTTAAAACCGTCAAAATGCCCCACGGTCACTATCACATTATTGAAAACGAGGGGATCAGCATTGGCGGAATTACCGACAGTTTAATCCCCACCCTTCCCTCACATTGGACTGGCTATATTACCGTTAACGATGTGGATCAAGTGGCTATCAGTGCTAAAAAACTCGGCGGTGACATTCTGTTTGGCCCTGAAGACATTCCAGAGGTGGGCCGTTTTTGTTGGATAAAAGACCCACAGGGCGCCATTATTGCGGCCATTAGCTATTTAAAACGTTGATGTAA'
        c = Sequence('>test', test)
        t_TTT_GGG_FF = [
            'TTTTAATACTGCAGGG', 'TTTAATACTGCAGGG',
            'TTTTTGGCTGGCACTTTAAAACCGTCAAAATGCCCCACGGTCACTATCACATTATTGAAAACGAGGG',
            'TTTTGGCTGGCACTTTAAAACCGTCAAAATGCCCCACGGTCACTATCACATTATTGAAAACGAGGG',
            'TTTGGCTGGCACTTTAAAACCGTCAAAATGCCCCACGGTCACTATCACATTATTGAAAACGAGGG',
            'TTTAAAACCGTCAAAATGCCCCACGGTCACTATCACATTATTGAAAACGAGGG',
            'TTTAATCCCCACCCTTCCCTCACATTGGACTGGCTATATTACCGTTAACGATGTGGATCAAGTGGCTATCAGTGCTAAAAAACTCGGCGGTGACATTCTGTTTGGCCCTGAAGACATTCCAGAGGTGGG',
            'TTTGGCCCTGAAGACATTCCAGAGGTGGG', 'TTTTTGTTGGATAAAAGACCCACAGGG',
            'TTTTGTTGGATAAAAGACCCACAGGG', 'TTTGTTGGATAAAAGACCCACAGGG',
            'TTTTAAATAGCTAATGGCCGCAATAATGGCGCCCTGTGGG',
            'TTTAAATAGCTAATGGCCGCAATAATGGCGCCCTGTGGG',
            'TTTTATCCAACAAAAACGGCCCACCTCTGGAATGTCTTCAGGG',
            'TTTATCCAACAAAAACGGCCCACCTCTGGAATGTCTTCAGGG',
            'TTTTTTAGCACTGATAGCCACTTGATCCACATCGTTAACGGTAATATAGCCAGTCCAATGTGAGGG',
            'TTTTTAGCACTGATAGCCACTTGATCCACATCGTTAACGGTAATATAGCCAGTCCAATGTGAGGG',
            'TTTTAGCACTGATAGCCACTTGATCCACATCGTTAACGGTAATATAGCCAGTCCAATGTGAGGG',
            'TTTAGCACTGATAGCCACTTGATCCACATCGTTAACGGTAATATAGCCAGTCCAATGTGAGGG',
            'TTTTCAATAATGTGATAGTGACCGTGGG', 'TTTCAATAATGTGATAGTGACCGTGGG'
        ]

        t_TTT_CCC_FR = [
            'TTTTAATACTGCAGGG', 'TTTAATACTGCAGGG',
            'TTTTTGGCTGGCACTTTAAAACCGTCAAAATGCCCCACGGTCACTATCACATTATTGAAAACGAGGG',
            'TTTTGGCTGGCACTTTAAAACCGTCAAAATGCCCCACGGTCACTATCACATTATTGAAAACGAGGG',
            'TTTGGCTGGCACTTTAAAACCGTCAAAATGCCCCACGGTCACTATCACATTATTGAAAACGAGGG',
            'TTTAAAACCGTCAAAATGCCCCACGGTCACTATCACATTATTGAAAACGAGGG',
            'TTTAATCCCCACCCTTCCCTCACATTGGACTGGCTATATTACCGTTAACGATGTGGATCAAGTGGCTATCAGTGCTAAAAAACTCGGCGGTGACATTCTGTTTGGCCCTGAAGACATTCCAGAGGTGGG',
            'TTTGGCCCTGAAGACATTCCAGAGGTGGG', 'TTTTTGTTGGATAAAAGACCCACAGGG',
            'TTTTGTTGGATAAAAGACCCACAGGG', 'TTTGTTGGATAAAAGACCCACAGGG',
            'TTTTAAATAGCTAATGGCCGCAATAATGGCGCCCTGTGGG',
            'TTTAAATAGCTAATGGCCGCAATAATGGCGCCCTGTGGG',
            'TTTTATCCAACAAAAACGGCCCACCTCTGGAATGTCTTCAGGG',
            'TTTATCCAACAAAAACGGCCCACCTCTGGAATGTCTTCAGGG',
            'TTTTTTAGCACTGATAGCCACTTGATCCACATCGTTAACGGTAATATAGCCAGTCCAATGTGAGGG',
            'TTTTTAGCACTGATAGCCACTTGATCCACATCGTTAACGGTAATATAGCCAGTCCAATGTGAGGG',
            'TTTTAGCACTGATAGCCACTTGATCCACATCGTTAACGGTAATATAGCCAGTCCAATGTGAGGG',
            'TTTAGCACTGATAGCCACTTGATCCACATCGTTAACGGTAATATAGCCAGTCCAATGTGAGGG',
            'TTTTCAATAATGTGATAGTGACCGTGGG', 'TTTCAATAATGTGATAGTGACCGTGGG'
        ]

        self.assertEqual(t_TTT_GGG_FF,
                         c.find_primers('TTT', 'GGG', 'FF', 0, 10000))
        self.assertEqual(t_TTT_CCC_FR,
                         c.find_primers('TTT', 'CCC', 'FR', 0, 10000))
コード例 #13
0
ファイル: test_sequence.py プロジェクト: BiobankLab/FA_TOOL
 def translate2protein_generic(self):
     tdict = {
         'GCA':'A','GCC':'A','GCG':'A','GCT':'A', 'TGC':'C','TGT':'C', 'GAC':'D', 'GAT':'D', 'GAA':'E', 'GAG':'E',
         'TTC':'F', 'TTT':'F', 'GGA':'G', 'GGC':'G', 'GGG':'G', 'GGT':'G', 'CAC':'H', 'CAT':'H', 'ATA':'I', 'ATC':'I', 'ATT':'I',
         'AAA':'K', 'AAG':'K', 'TTA':'L', 'TTG':'L', 'CTA':'L', 'CTC':'L', 'CTG':'L', 'CTT':'L', 'ATG':'M', 'AAC':'N', 'AAT':'N',
         'CCA':'P', 'CCC':'P', 'CCG':'P', 'CCT':'P', 'CAA':'Q', 'CAG':'Q', 'AGA':'R', 'AGG':'R', 'CGA':'R', 'CGC':'R', 'CGG':'R', 
         'CGT':'R', 'AGC':'S', 'AGT':'S', 'TCA':'S', 'TCC':'S', 'TCG':'S', 'TCT':'S', 'ACA':'T', 'ACC':'T', 'ACG':'T', 'ACT':'T',
         'GTA':'V', 'GTC':'V', 'GTG':'V', 'GTT':'V', 'TGG':'W', 'TAC':'Y', 'TAT':'Y', 'TAG': '*', 'TGA':'*', 'TAA':'*'
     }
     
     test = 'ATGGAATCGGCTTTTAATACTGCAGGGGCGTTAAGTTGGCATGAACTCACAACCAATAATACCGAAGAGGCCATGCGCTTCTATGCTGAGATTTTTGGCTGGCACTTTAAAACCGTCAAAATGCCCCACGGTCACTATCACATTATTGAAAACGAGGGGATCAGCATTGGCGGAATTACCGACAGTTTAATCCCCACCCTTCCCTCACATTGGACTGGCTATATTACCGTTAACGATGTGGATCAAGTGGCTATCAGTGCTAAAAAACTCGGCGGTGACATTCTGTTTGGCCCTGAAGACATTCCAGAGGTGGGCCGTTTTTGTTGGATAAAAGACCCACAGGGCGCCATTATTGCGGCCATTAGCTATTTAAAACGTTGATGTAA'
     
     f1 = ('','MESAFNTAGALSWHELTTNNTEEAMRFYAEIFGWHFKTVKMPHGHYHIIENEGISIGGITDSLIPTLPSHWTGYITVNDVDQVAISAKKLGGDILFGPEDIPEVGRFCWIKDPQGAIIAAISYLKR*C','AA')
     f2 = ('A','WNRLLILQGR*VGMNSQPIIPKRPCASMLRFLAGTLKPSKCPTVTITLLKTRGSALAELPTV*SPPFPHIGLAILPLTMWIKWLSVLKNSAVTFCLALKTFQRWAVFVG*KTHRAPLLRPLAI*NVDV','A')
     f3 = ('AT','GIGF*YCRGVKLA*THNQ*YRRGHALLC*DFWLAL*NRQNAPRSLSHY*KRGDQHWRNYRQFNPHPSLTLDWLYYR*RCGSSGYQC*KTRR*HSVWP*RHSRGGPFLLDKRPTGRHYCGH*LFKTLM*','')
     
     self.assertEqual([f1,f2,f3], Sequence.translate2protein_generic(test, tdict))
コード例 #14
0
 def test_cmp(self):
     c = Sequence('>name', '   1 ACTG')
     o = Sequence('>name2', '   1 ACTG')
     self.assertTrue(c.seq == o.seq)
コード例 #15
0
 def test_cut_name(self):
     c = Sequence('>test_something_special_gene=qwerty_ready', 'ACTGTACGGA')
     c.cut_name(5)
     self.assertEqual('>test', c.name)
     c.cut_name(10)
     self.assertEqual('>test', c.name)
コード例 #16
0
    def test_find_aprox_primers(self):
        test = 'ATGGAATCGGCTTTTAATACTGCAGGGGCGTTAAGTTGGCATGAACTCACAACCAATAATACCGAAGAGGCCATGCGCTTCTATGCTGAGATTTTTGGCTGGCACTTTAAAACCGTCAAAATGCCCCACGGTCACTATCACATTATTGAAAACGAGGGGATCAGCATTGGCGGAATTACCGACAGTTTAATCCCCACCCTTCCCTCACATTGGACTGGCTATATTACCGTTAACGATGTGGATCAAGTGGCTATCAGTGCTAAAAAACTCGGCGGTGACATTCTGTTTGGCCCTGAAGACATTCCAGAGGTGGGCCGTTTTTGTTGGATAAAAGACCCACAGGGCGCCATTATTGCGGCCATTAGCTATTTAAAACGTTGATGTAA'
        c = Sequence('>test', test)

        t_TTT_GGG_FF = [
            'TTTTAATACTGCAGGG', 'TTTAATACTGCAGGG',
            'TTTTTGGCTGGCACTTTAAAACCGTCAAAATGCCCCACGGTCACTATCACATTATTGAAAACGAGGG',
            'TTTTGGCTGGCACTTTAAAACCGTCAAAATGCCCCACGGTCACTATCACATTATTGAAAACGAGGG',
            'TTTGGCTGGCACTTTAAAACCGTCAAAATGCCCCACGGTCACTATCACATTATTGAAAACGAGGG',
            'TTTAAAACCGTCAAAATGCCCCACGGTCACTATCACATTATTGAAAACGAGGG',
            'TTTAATCCCCACCCTTCCCTCACATTGGACTGGCTATATTACCGTTAACGATGTGGATCAAGTGGCTATCAGTGCTAAAAAACTCGGCGGTGACATTCTGTTTGGCCCTGAAGACATTCCAGAGGTGGG',
            'TTTGGCCCTGAAGACATTCCAGAGGTGGG', 'TTTTTGTTGGATAAAAGACCCACAGGG',
            'TTTTGTTGGATAAAAGACCCACAGGG', 'TTTGTTGGATAAAAGACCCACAGGG',
            'TTTTAAATAGCTAATGGCCGCAATAATGGCGCCCTGTGGG',
            'TTTAAATAGCTAATGGCCGCAATAATGGCGCCCTGTGGG',
            'TTTTATCCAACAAAAACGGCCCACCTCTGGAATGTCTTCAGGG',
            'TTTATCCAACAAAAACGGCCCACCTCTGGAATGTCTTCAGGG',
            'TTTTTTAGCACTGATAGCCACTTGATCCACATCGTTAACGGTAATATAGCCAGTCCAATGTGAGGG',
            'TTTTTAGCACTGATAGCCACTTGATCCACATCGTTAACGGTAATATAGCCAGTCCAATGTGAGGG',
            'TTTTAGCACTGATAGCCACTTGATCCACATCGTTAACGGTAATATAGCCAGTCCAATGTGAGGG',
            'TTTAGCACTGATAGCCACTTGATCCACATCGTTAACGGTAATATAGCCAGTCCAATGTGAGGG',
            'TTTTCAATAATGTGATAGTGACCGTGGG', 'TTTCAATAATGTGATAGTGACCGTGGG'
        ]

        t_TTT_CCC_FR = [
            'TTTTAATACTGCAGGG', 'TTTAATACTGCAGGG',
            'TTTTTGGCTGGCACTTTAAAACCGTCAAAATGCCCCACGGTCACTATCACATTATTGAAAACGAGGG',
            'TTTTGGCTGGCACTTTAAAACCGTCAAAATGCCCCACGGTCACTATCACATTATTGAAAACGAGGG',
            'TTTGGCTGGCACTTTAAAACCGTCAAAATGCCCCACGGTCACTATCACATTATTGAAAACGAGGG',
            'TTTAAAACCGTCAAAATGCCCCACGGTCACTATCACATTATTGAAAACGAGGG',
            'TTTAATCCCCACCCTTCCCTCACATTGGACTGGCTATATTACCGTTAACGATGTGGATCAAGTGGCTATCAGTGCTAAAAAACTCGGCGGTGACATTCTGTTTGGCCCTGAAGACATTCCAGAGGTGGG',
            'TTTGGCCCTGAAGACATTCCAGAGGTGGG', 'TTTTTGTTGGATAAAAGACCCACAGGG',
            'TTTTGTTGGATAAAAGACCCACAGGG', 'TTTGTTGGATAAAAGACCCACAGGG',
            'TTTTAAATAGCTAATGGCCGCAATAATGGCGCCCTGTGGG',
            'TTTAAATAGCTAATGGCCGCAATAATGGCGCCCTGTGGG',
            'TTTTATCCAACAAAAACGGCCCACCTCTGGAATGTCTTCAGGG',
            'TTTATCCAACAAAAACGGCCCACCTCTGGAATGTCTTCAGGG',
            'TTTTTTAGCACTGATAGCCACTTGATCCACATCGTTAACGGTAATATAGCCAGTCCAATGTGAGGG',
            'TTTTTAGCACTGATAGCCACTTGATCCACATCGTTAACGGTAATATAGCCAGTCCAATGTGAGGG',
            'TTTTAGCACTGATAGCCACTTGATCCACATCGTTAACGGTAATATAGCCAGTCCAATGTGAGGG',
            'TTTAGCACTGATAGCCACTTGATCCACATCGTTAACGGTAATATAGCCAGTCCAATGTGAGGG',
            'TTTTCAATAATGTGATAGTGACCGTGGG', 'TTTCAATAATGTGATAGTGACCGTGGG'
        ]

        self.assertEqual(t_TTT_GGG_FF,
                         c.find_aprox_primers('TTT', 'GGG', 'FF', 0, 0, 10000))
        self.assertEqual(t_TTT_CCC_FR,
                         c.find_aprox_primers('TTT', 'CCC', 'FR', 0, 0, 10000))

        t_TTTT_GGGG_FF = [
            'CTTTTAATACTGCAGGG',
            'TTTTAATACTGCAGGG',
            'TTTAATACTGCAGGG',
            'TTCTATGCTGAGATTTTTGGCTGGCACTTTAAAACCGTCAAAATGCCCCACGGTCACTATCACATTATTGAAAACGAGG',
            'ATTTTTGGCTGGCACTTTAAAACCGTCAAAATGCCCCACGGTCACTATCACATTATTGAAAACGAGG',
            'TTTTTGGCTGGCACTTTAAAACCGTCAAAATGCCCCACGGTCACTATCACATTATTGAAAACGAGG',
            'TTTTGGCTGGCACTTTAAAACCGTCAAAATGCCCCACGGTCACTATCACATTATTGAAAACGAGG',
            'TTTGGCTGGCACTTTAAAACCGTCAAAATGCCCCACGGTCACTATCACATTATTGAAAACGAGG',
            'CTTTAAAACCGTCAAAATGCCCCACGGTCACTATCACATTATTGAAAACGAGG',
            'TTTAAAACCGTCAAAATGCCCCACGGTCACTATCACATTATTGAAAACGAGG',
            'TTATTGAAAACGAGG',
            'TATTGAAAACGAGG',
            'GTTTAATCCCCACCCTTCCCTCACATTGGACTGGCTATATTACCGTTAACGATGTGG',
            'TTTAATCCCCACCCTTCCCTCACATTGGACTGGCTATATTACCGTTAACGATGTGG',
            'TATTACCGTTAACGATGTGG',
            'TTCTGTTTGGCCCTGAAGACATTCCAGAGG',
            'TGTTTGGCCCTGAAGACATTCCAGAGG',
            'GTTTGGCCCTGAAGACATTCCAGAGG',
            'TTTGGCCCTGAAGACATTCCAGAGG',
            'GTTTTTGTTGGATAAAAGACCCACAGGG',
            'TTTTTGTTGGATAAAAGACCCACAGGG',
            'TTTTGTTGGATAAAAGACCCACAGGG',
            'TTTGTTGGATAAAAGACCCACAGGG',
            'TTGTTGGATAAAAGACCCACAGGG',
            'TGTTGGATAAAAGACCCACAGGG',
            'TTATTGCGG',
            'TATTGCGG',
            'GTTTTAAATAGCTAATGGCCGCAATAATGGCG',
            'TTTTAAATAGCTAATGGCCGCAATAATGGCG',
            'TTTAAATAGCTAATGGCCGCAATAATGGCG',
            'TCTTTTATCCAACAAAAACGGCCCACCTCTGGAATGTCTTCAGGG',
            'CTTTTATCCAACAAAAACGGCCCACCTCTGGAATGTCTTCAGGG',
            'TTTTATCCAACAAAAACGGCCCACCTCTGGAATGTCTTCAGGG',
            'TTTATCCAACAAAAACGGCCCACCTCTGGAATGTCTTCAGGG',
            'TTATCCAACAAAAACGGCCCACCTCTGGAATGTCTTCAGGG',
            'TCTTCAGGG',
            'GTTTTTTAGCACTGATAGCCACTTGATCCACATCGTTAACGGTAATATAGCCAGTCCAATGTGAGG',
            'TTTTTTAGCACTGATAGCCACTTGATCCACATCGTTAACGGTAATATAGCCAGTCCAATGTGAGG',
            'TTTTTAGCACTGATAGCCACTTGATCCACATCGTTAACGGTAATATAGCCAGTCCAATGTGAGG',
            'TTTTAGCACTGATAGCCACTTGATCCACATCGTTAACGGTAATATAGCCAGTCCAATGTGAGG',
            'TTTAGCACTGATAGCCACTTGATCCACATCGTTAACGGTAATATAGCCAGTCCAATGTGAGG',
            'GTTTTCAATAATGTGATAGTGACCGTGG',
            'TTTTCAATAATGTGATAGTGACCGTGG',
            'TTTCAATAATGTGATAGTGACCGTGG',
        ]

        self.assertEqual(
            t_TTTT_GGGG_FF,
            c.find_aprox_primers('TTTT', 'GGGG', 'FF', 1, 0, 10000))
        self.assertEqual(
            t_TTTT_GGGG_FF,
            c.find_aprox_primers('TTTT', 'CCCC', 'FR', 1, 0, 10000))
        self.assertEqual(
            c.find_aprox_primers('TTTT', 'CCCC', 'fr', 1, 0, 10000),
            c.find_aprox_primers('TTTT', 'CCCC', 'FR', 1, 0, 10000))
        self.assertEqual(
            c.find_aprox_primers('TTTT', 'CCCC', 'Fr', 1, 0, 10000),
            c.find_aprox_primers('TTTT', 'CCCC', 'FR', 1, 0, 10000))
        self.assertEqual(
            c.find_aprox_primers('TTTT', 'CCCC', 'fR', 1, 0, 10000),
            c.find_aprox_primers('TTTT', 'CCCC', 'FR', 1, 0, 10000))

        t_TTTT_GGGG_FF_60 = [
            'TTCTATGCTGAGATTTTTGGCTGGCACTTTAAAACCGTCAAAATGCCCCACGGTCACTATCACATTATTGAAAACGAGG',
            'ATTTTTGGCTGGCACTTTAAAACCGTCAAAATGCCCCACGGTCACTATCACATTATTGAAAACGAGG',
            'TTTTTGGCTGGCACTTTAAAACCGTCAAAATGCCCCACGGTCACTATCACATTATTGAAAACGAGG',
            'TTTTGGCTGGCACTTTAAAACCGTCAAAATGCCCCACGGTCACTATCACATTATTGAAAACGAGG',
            'TTTGGCTGGCACTTTAAAACCGTCAAAATGCCCCACGGTCACTATCACATTATTGAAAACGAGG',
            'GTTTTTTAGCACTGATAGCCACTTGATCCACATCGTTAACGGTAATATAGCCAGTCCAATGTGAGG',
            'TTTTTTAGCACTGATAGCCACTTGATCCACATCGTTAACGGTAATATAGCCAGTCCAATGTGAGG',
            'TTTTTAGCACTGATAGCCACTTGATCCACATCGTTAACGGTAATATAGCCAGTCCAATGTGAGG',
            'TTTTAGCACTGATAGCCACTTGATCCACATCGTTAACGGTAATATAGCCAGTCCAATGTGAGG',
            'TTTAGCACTGATAGCCACTTGATCCACATCGTTAACGGTAATATAGCCAGTCCAATGTGAGG',
        ]

        self.assertEqual(
            t_TTTT_GGGG_FF_60,
            c.find_aprox_primers('TTTT', 'CCCC', 'FR', 1, 60, 10000))

        t_TTTT_GGGG_FF_60_65 = [
            'TTTTGGCTGGCACTTTAAAACCGTCAAAATGCCCCACGGTCACTATCACATTATTGAAAACGAGG',
            'TTTGGCTGGCACTTTAAAACCGTCAAAATGCCCCACGGTCACTATCACATTATTGAAAACGAGG',
            'TTTTTTAGCACTGATAGCCACTTGATCCACATCGTTAACGGTAATATAGCCAGTCCAATGTGAGG',
            'TTTTTAGCACTGATAGCCACTTGATCCACATCGTTAACGGTAATATAGCCAGTCCAATGTGAGG',
            'TTTTAGCACTGATAGCCACTTGATCCACATCGTTAACGGTAATATAGCCAGTCCAATGTGAGG',
            'TTTAGCACTGATAGCCACTTGATCCACATCGTTAACGGTAATATAGCCAGTCCAATGTGAGG',
        ]

        #for r in c.find_aprox_primers('TTTT', 'GGGG', 'FF', 1,60,65):
        #    print r

        self.assertEqual(t_TTTT_GGGG_FF_60_65,
                         c.find_aprox_primers('TTTT', 'CCCC', 'FR', 1, 60, 65))
コード例 #17
0
 def test_setUpSequence(self):
     c = Sequence('>name', 'ACTGactg')
     self.assertTrue(isinstance(c, Sequence))
     self.assertEqual(c.name, '>name')
     self.assertEqual(c.seq, 'ACTGactg')
コード例 #18
0
 def test_contig_str(self):
     c = Sequence('>name', 'ACTGactg')
     self.assertEqual(str(c), '>name\nACTGactg\n')
コード例 #19
0
    def translate2protein_generic(self):
        tdict = {
            'GCA': 'A',
            'GCC': 'A',
            'GCG': 'A',
            'GCT': 'A',
            'TGC': 'C',
            'TGT': 'C',
            'GAC': 'D',
            'GAT': 'D',
            'GAA': 'E',
            'GAG': 'E',
            'TTC': 'F',
            'TTT': 'F',
            'GGA': 'G',
            'GGC': 'G',
            'GGG': 'G',
            'GGT': 'G',
            'CAC': 'H',
            'CAT': 'H',
            'ATA': 'I',
            'ATC': 'I',
            'ATT': 'I',
            'AAA': 'K',
            'AAG': 'K',
            'TTA': 'L',
            'TTG': 'L',
            'CTA': 'L',
            'CTC': 'L',
            'CTG': 'L',
            'CTT': 'L',
            'ATG': 'M',
            'AAC': 'N',
            'AAT': 'N',
            'CCA': 'P',
            'CCC': 'P',
            'CCG': 'P',
            'CCT': 'P',
            'CAA': 'Q',
            'CAG': 'Q',
            'AGA': 'R',
            'AGG': 'R',
            'CGA': 'R',
            'CGC': 'R',
            'CGG': 'R',
            'CGT': 'R',
            'AGC': 'S',
            'AGT': 'S',
            'TCA': 'S',
            'TCC': 'S',
            'TCG': 'S',
            'TCT': 'S',
            'ACA': 'T',
            'ACC': 'T',
            'ACG': 'T',
            'ACT': 'T',
            'GTA': 'V',
            'GTC': 'V',
            'GTG': 'V',
            'GTT': 'V',
            'TGG': 'W',
            'TAC': 'Y',
            'TAT': 'Y',
            'TAG': '*',
            'TGA': '*',
            'TAA': '*'
        }

        test = 'ATGGAATCGGCTTTTAATACTGCAGGGGCGTTAAGTTGGCATGAACTCACAACCAATAATACCGAAGAGGCCATGCGCTTCTATGCTGAGATTTTTGGCTGGCACTTTAAAACCGTCAAAATGCCCCACGGTCACTATCACATTATTGAAAACGAGGGGATCAGCATTGGCGGAATTACCGACAGTTTAATCCCCACCCTTCCCTCACATTGGACTGGCTATATTACCGTTAACGATGTGGATCAAGTGGCTATCAGTGCTAAAAAACTCGGCGGTGACATTCTGTTTGGCCCTGAAGACATTCCAGAGGTGGGCCGTTTTTGTTGGATAAAAGACCCACAGGGCGCCATTATTGCGGCCATTAGCTATTTAAAACGTTGATGTAA'

        f1 = (
            '',
            'MESAFNTAGALSWHELTTNNTEEAMRFYAEIFGWHFKTVKMPHGHYHIIENEGISIGGITDSLIPTLPSHWTGYITVNDVDQVAISAKKLGGDILFGPEDIPEVGRFCWIKDPQGAIIAAISYLKR*C',
            'AA')
        f2 = (
            'A',
            'WNRLLILQGR*VGMNSQPIIPKRPCASMLRFLAGTLKPSKCPTVTITLLKTRGSALAELPTV*SPPFPHIGLAILPLTMWIKWLSVLKNSAVTFCLALKTFQRWAVFVG*KTHRAPLLRPLAI*NVDV',
            'A')
        f3 = (
            'AT',
            'GIGF*YCRGVKLA*THNQ*YRRGHALLC*DFWLAL*NRQNAPRSLSHY*KRGDQHWRNYRQFNPHPSLTLDWLYYR*RCGSSGYQC*KTRR*HSVWP*RHSRGGPFLLDKRPTGRHYCGH*LFKTLM*',
            '')

        self.assertEqual([f1, f2, f3],
                         Sequence.translate2protein_generic(test, tdict))
コード例 #20
0
 def test_normalise(self):
     c = Sequence('>name', '   1 ACTG')
     self.assertEqual('>name\n   1 ACTG\n', str(c))
     c.normalize()
     self.assertEqual('>name\nACTG\n', str(c))
コード例 #21
0
 def test_revers(self):
     c = Sequence('>name', '   1 ACTG')
     r = Sequence('>rev_name', 'CAGT')
     self.assertEqual(str(r), str(c.reverse()))
コード例 #22
0
 def test_translate2protein(self):
     test = 'ATGGAATCGGCTTTTAATACTGCAGGGGCGTTAAGTTGGCATGAACTCACAACCAATAATACCGAAGAGGCCATGCGCTTCTATGCTGAGATTTTTGGCTGGCACTTTAAAACCGTCAAAATGCCCCACGGTCACTATCACATTATTGAAAACGAGGGGATCAGCATTGGCGGAATTACCGACAGTTTAATCCCCACCCTTCCCTCACATTGGACTGGCTATATTACCGTTAACGATGTGGATCAAGTGGCTATCAGTGCTAAAAAACTCGGCGGTGACATTCTGTTTGGCCCTGAAGACATTCCAGAGGTGGGCCGTTTTTGTTGGATAAAAGACCCACAGGGCGCCATTATTGCGGCCATTAGCTATTTAAAACGTTGATGTAA'
     c = Sequence('>name', test)
コード例 #23
0
 def load_content(content):
     ncs = re.findall(re.compile('(?=(^>[\S\s]+?)(^>|\Z))',re.M), content)
     contigs_list = []
     for r in ncs:
         contigs_list.append(Sequence(r[0].split('\n', 1)[0].rstrip(), re.sub('^>.*\n', '', r[0].rstrip())))
     return contigs_list
コード例 #24
0
 def test_validate_name_string(self):
     c = Sequence('>name', 'ACTGactg')
     self.assertEqual(c.validate_name_string('>name'), 1)