class DNASequenceTests(TestCase):

    def setUp(self):
        self.empty = DNASequence('')
        self.b1 = DNASequence('GATTACA')
        self.b2 = DNASequence(
            'ACCGGTACC', id="test-seq-2",
            description="A test sequence")
        self.b3 = DNASequence(
            'ACCGGUACC', id="bad-seq-1",
            description="Not a DNA sequence")
        self.b4 = DNASequence(
            'MRWSYKVHDBN', id="degen",
            description="All of the degenerate bases")
        self.b5 = DNASequence('.G--ATTAC-A...')

    def test_alphabet(self):
        exp = {
            'A', 'C', 'B', 'D', 'G', 'H', 'K', 'M', 'N', 'S', 'R', 'T', 'W',
            'V', 'Y', 'a', 'c', 'b', 'd', 'g', 'h', 'k', 'm', 'n', 's', 'r',
            't', 'w', 'v', 'y'
        }

        self.assertEqual(self.b1.alphabet(), exp)
        self.assertEqual(DNASequence.alphabet(), exp)

    def test_gap_alphabet(self):
        self.assertEqual(self.b1.gap_alphabet(), set('-.'))

    def test_complement_map(self):
        exp = {
            '-': '-', '.': '.', 'A': 'T', 'C': 'G', 'B': 'V', 'D': 'H',
            'G': 'C', 'H': 'D', 'K': 'M', 'M': 'K', 'N': 'N', 'S': 'S',
            'R': 'Y', 'T': 'A', 'W': 'W', 'V': 'B', 'Y': 'R', 'a': 't',
            'c': 'g', 'b': 'v', 'd': 'h', 'g': 'c', 'h': 'd', 'k': 'm',
            'm': 'k', 'n': 'n', 's': 's', 'r': 'y', 't': 'a', 'w': 'w',
            'v': 'b', 'y': 'r'
        }
        self.assertEqual(self.b1.complement_map(), exp)
        self.assertEqual(DNASequence.complement_map(), exp)

    def test_iupac_standard_characters(self):
        exp = set("ACGTacgt")
        self.assertEqual(self.b1.iupac_standard_characters(), exp)
        self.assertEqual(DNASequence.iupac_standard_characters(), exp)

    def test_iupac_degeneracies(self):
        exp = {
            'B': set(['C', 'T', 'G']), 'D': set(['A', 'T', 'G']),
            'H': set(['A', 'C', 'T']), 'K': set(['T', 'G']),
            'M': set(['A', 'C']), 'N': set(['A', 'C', 'T', 'G']),
            'S': set(['C', 'G']), 'R': set(['A', 'G']), 'W': set(['A', 'T']),
            'V': set(['A', 'C', 'G']), 'Y': set(['C', 'T']),
            'b': set(['c', 't', 'g']), 'd': set(['a', 't', 'g']),
            'h': set(['a', 'c', 't']), 'k': set(['t', 'g']),
            'm': set(['a', 'c']), 'n': set(['a', 'c', 't', 'g']),
            's': set(['c', 'g']), 'r': set(['a', 'g']), 'w': set(['a', 't']),
            'v': set(['a', 'c', 'g']), 'y': set(['c', 't'])
        }
        self.assertEqual(self.b1.iupac_degeneracies(), exp)
        self.assertEqual(DNASequence.iupac_degeneracies(), exp)

    def test_iupac_degenerate_characters(self):
        exp = set(['B', 'D', 'H', 'K', 'M', 'N', 'S', 'R', 'W', 'V', 'Y',
                   'b', 'd', 'h', 'k', 'm', 'n', 's', 'r', 'w', 'v', 'y'])
        self.assertEqual(self.b1.iupac_degenerate_characters(), exp)
        self.assertEqual(DNASequence.iupac_degenerate_characters(), exp)

    def test_iupac_characters(self):
        exp = {
            'A', 'C', 'B', 'D', 'G', 'H', 'K', 'M', 'N', 'S', 'R', 'T', 'W',
            'V', 'Y', 'a', 'c', 'b', 'd', 'g', 'h', 'k', 'm', 'n', 's', 'r',
            't', 'w', 'v', 'y'
        }
        self.assertEqual(self.b1.iupac_characters(), exp)
        self.assertEqual(DNASequence.iupac_characters(), exp)

    def test_complement(self):
        self.assertEqual(self.b1.complement(), DNASequence("CTAATGT"))
        self.assertEqual(self.b2.complement(), DNASequence("TGGCCATGG"))
        self.assertRaises(BiologicalSequenceError, self.b3.complement)
        self.assertEqual(self.b4.complement(), DNASequence("KYWSRMBDHVN"))
        self.assertEqual(self.b5.complement(), DNASequence(".C--TAATG-T..."))

    def test_reverse_complement(self):
        self.assertEqual(self.b1.reverse_complement(), DNASequence("TGTAATC"))
        self.assertEqual(self.b2.reverse_complement(),
                         DNASequence("GGTACCGGT"))
        self.assertRaises(BiologicalSequenceError, self.b3.reverse_complement)
        self.assertEqual(self.b4.reverse_complement(),
                         DNASequence("NVHDBMRSWYK"))

    def test_unsupported_characters(self):
        self.assertEqual(self.b1.unsupported_characters(), set())
        self.assertEqual(self.b2.unsupported_characters(), set())
        self.assertEqual(self.b3.unsupported_characters(), set('U'))
        self.assertEqual(self.b4.unsupported_characters(), set())

    def test_has_unsupported_characters(self):
        self.assertFalse(self.b1.has_unsupported_characters())
        self.assertFalse(self.b2.has_unsupported_characters())
        self.assertTrue(self.b3.has_unsupported_characters())
        self.assertFalse(self.b4.has_unsupported_characters())

    def test_is_reverse_complement(self):
        self.assertFalse(self.b1.is_reverse_complement(self.b1))
        self.assertTrue(
            self.b1.is_reverse_complement(DNASequence('TGTAATC')))
        self.assertTrue(
            self.b4.is_reverse_complement(DNASequence('NVHDBMRSWYK')))

    def test_nondegenerates_invalid(self):
        with self.assertRaises(BiologicalSequenceError):
            list(DNASequence('AZA').nondegenerates())

    def test_nondegenerates_empty(self):
        self.assertEqual(list(self.empty.nondegenerates()), [self.empty])

    def test_nondegenerates_no_degens(self):
        self.assertEqual(list(self.b1.nondegenerates()), [self.b1])

    def test_nondegenerates_all_degens(self):
        # Same chars.
        exp = [DNASequence('CC'), DNASequence('CG'), DNASequence('GC'),
               DNASequence('GG')]
        # Sort based on sequence string, as order is not guaranteed.
        obs = sorted(DNASequence('SS').nondegenerates(), key=str)
        self.assertEqual(obs, exp)

        # Different chars.
        exp = [DNASequence('AC'), DNASequence('AG'), DNASequence('GC'),
               DNASequence('GG')]
        obs = sorted(DNASequence('RS').nondegenerates(), key=str)
        self.assertEqual(obs, exp)

        # Odd number of chars.
        obs = list(DNASequence('NNN').nondegenerates())
        self.assertEqual(len(obs), 4**3)

    def test_nondegenerates_mixed_degens(self):
        exp = [DNASequence('AGC'), DNASequence('AGT'), DNASequence('GGC'),
               DNASequence('GGT')]
        obs = sorted(DNASequence('RGY').nondegenerates(), key=str)
        self.assertEqual(obs, exp)

    def test_nondegenerates_gap_mixed_case(self):
        exp = [DNASequence('-A.a'), DNASequence('-A.c'),
               DNASequence('-C.a'), DNASequence('-C.c')]
        obs = sorted(DNASequence('-M.m').nondegenerates(), key=str)
        self.assertEqual(obs, exp)
Beispiel #2
0
class DNASequenceTests(TestCase):
    def setUp(self):
        self.empty = DNASequence('')
        self.b1 = DNASequence('GATTACA')
        self.b2 = DNASequence('ACCGGTACC',
                              id="test-seq-2",
                              description="A test sequence")
        self.b3 = DNASequence('ACCGGUACC',
                              id="bad-seq-1",
                              description="Not a DNA sequence")
        self.b4 = DNASequence('MRWSYKVHDBN',
                              id="degen",
                              description="All of the degenerate bases")
        self.b5 = DNASequence('.G--ATTAC-A...')

    def test_alphabet(self):
        exp = {
            'A', 'C', 'B', 'D', 'G', 'H', 'K', 'M', 'N', 'S', 'R', 'T', 'W',
            'V', 'Y', 'a', 'c', 'b', 'd', 'g', 'h', 'k', 'm', 'n', 's', 'r',
            't', 'w', 'v', 'y'
        }

        self.assertEqual(self.b1.alphabet(), exp)
        self.assertEqual(DNASequence.alphabet(), exp)

    def test_gap_alphabet(self):
        self.assertEqual(self.b1.gap_alphabet(), set('-.'))

    def test_complement_map(self):
        exp = {
            '-': '-',
            '.': '.',
            'A': 'T',
            'C': 'G',
            'B': 'V',
            'D': 'H',
            'G': 'C',
            'H': 'D',
            'K': 'M',
            'M': 'K',
            'N': 'N',
            'S': 'S',
            'R': 'Y',
            'T': 'A',
            'W': 'W',
            'V': 'B',
            'Y': 'R',
            'a': 't',
            'c': 'g',
            'b': 'v',
            'd': 'h',
            'g': 'c',
            'h': 'd',
            'k': 'm',
            'm': 'k',
            'n': 'n',
            's': 's',
            'r': 'y',
            't': 'a',
            'w': 'w',
            'v': 'b',
            'y': 'r'
        }
        self.assertEqual(self.b1.complement_map(), exp)
        self.assertEqual(DNASequence.complement_map(), exp)

    def test_iupac_standard_characters(self):
        exp = set("ACGTacgt")
        self.assertEqual(self.b1.iupac_standard_characters(), exp)
        self.assertEqual(DNASequence.iupac_standard_characters(), exp)

    def test_iupac_degeneracies(self):
        exp = {
            'B': set(['C', 'T', 'G']),
            'D': set(['A', 'T', 'G']),
            'H': set(['A', 'C', 'T']),
            'K': set(['T', 'G']),
            'M': set(['A', 'C']),
            'N': set(['A', 'C', 'T', 'G']),
            'S': set(['C', 'G']),
            'R': set(['A', 'G']),
            'W': set(['A', 'T']),
            'V': set(['A', 'C', 'G']),
            'Y': set(['C', 'T']),
            'b': set(['c', 't', 'g']),
            'd': set(['a', 't', 'g']),
            'h': set(['a', 'c', 't']),
            'k': set(['t', 'g']),
            'm': set(['a', 'c']),
            'n': set(['a', 'c', 't', 'g']),
            's': set(['c', 'g']),
            'r': set(['a', 'g']),
            'w': set(['a', 't']),
            'v': set(['a', 'c', 'g']),
            'y': set(['c', 't'])
        }
        self.assertEqual(self.b1.iupac_degeneracies(), exp)
        self.assertEqual(DNASequence.iupac_degeneracies(), exp)

    def test_iupac_degenerate_characters(self):
        exp = set([
            'B', 'D', 'H', 'K', 'M', 'N', 'S', 'R', 'W', 'V', 'Y', 'b', 'd',
            'h', 'k', 'm', 'n', 's', 'r', 'w', 'v', 'y'
        ])
        self.assertEqual(self.b1.iupac_degenerate_characters(), exp)
        self.assertEqual(DNASequence.iupac_degenerate_characters(), exp)

    def test_iupac_characters(self):
        exp = {
            'A', 'C', 'B', 'D', 'G', 'H', 'K', 'M', 'N', 'S', 'R', 'T', 'W',
            'V', 'Y', 'a', 'c', 'b', 'd', 'g', 'h', 'k', 'm', 'n', 's', 'r',
            't', 'w', 'v', 'y'
        }
        self.assertEqual(self.b1.iupac_characters(), exp)
        self.assertEqual(DNASequence.iupac_characters(), exp)

    def test_complement(self):
        self.assertEqual(self.b1.complement(), DNASequence("CTAATGT"))
        self.assertEqual(self.b2.complement(), DNASequence("TGGCCATGG"))
        self.assertRaises(BiologicalSequenceError, self.b3.complement)
        self.assertEqual(self.b4.complement(), DNASequence("KYWSRMBDHVN"))
        self.assertEqual(self.b5.complement(), DNASequence(".C--TAATG-T..."))

    def test_reverse_complement(self):
        self.assertEqual(self.b1.reverse_complement(), DNASequence("TGTAATC"))
        self.assertEqual(self.b2.reverse_complement(),
                         DNASequence("GGTACCGGT"))
        self.assertRaises(BiologicalSequenceError, self.b3.reverse_complement)
        self.assertEqual(self.b4.reverse_complement(),
                         DNASequence("NVHDBMRSWYK"))

    def test_unsupported_characters(self):
        self.assertEqual(self.b1.unsupported_characters(), set())
        self.assertEqual(self.b2.unsupported_characters(), set())
        self.assertEqual(self.b3.unsupported_characters(), set('U'))
        self.assertEqual(self.b4.unsupported_characters(), set())

    def test_has_unsupported_characters(self):
        self.assertFalse(self.b1.has_unsupported_characters())
        self.assertFalse(self.b2.has_unsupported_characters())
        self.assertTrue(self.b3.has_unsupported_characters())
        self.assertFalse(self.b4.has_unsupported_characters())

    def test_is_reverse_complement(self):
        self.assertFalse(self.b1.is_reverse_complement(self.b1))
        self.assertTrue(self.b1.is_reverse_complement(DNASequence('TGTAATC')))
        self.assertTrue(
            self.b4.is_reverse_complement(DNASequence('NVHDBMRSWYK')))

    def test_nondegenerates_invalid(self):
        with self.assertRaises(BiologicalSequenceError):
            list(DNASequence('AZA').nondegenerates())

    def test_nondegenerates_empty(self):
        self.assertEqual(list(self.empty.nondegenerates()), [self.empty])

    def test_nondegenerates_no_degens(self):
        self.assertEqual(list(self.b1.nondegenerates()), [self.b1])

    def test_nondegenerates_all_degens(self):
        # Same chars.
        exp = [
            DNASequence('CC'),
            DNASequence('CG'),
            DNASequence('GC'),
            DNASequence('GG')
        ]
        # Sort based on sequence string, as order is not guaranteed.
        obs = sorted(DNASequence('SS').nondegenerates(), key=str)
        self.assertEqual(obs, exp)

        # Different chars.
        exp = [
            DNASequence('AC'),
            DNASequence('AG'),
            DNASequence('GC'),
            DNASequence('GG')
        ]
        obs = sorted(DNASequence('RS').nondegenerates(), key=str)
        self.assertEqual(obs, exp)

        # Odd number of chars.
        obs = list(DNASequence('NNN').nondegenerates())
        self.assertEqual(len(obs), 4**3)

    def test_nondegenerates_mixed_degens(self):
        exp = [
            DNASequence('AGC'),
            DNASequence('AGT'),
            DNASequence('GGC'),
            DNASequence('GGT')
        ]
        obs = sorted(DNASequence('RGY').nondegenerates(), key=str)
        self.assertEqual(obs, exp)

    def test_nondegenerates_gap_mixed_case(self):
        exp = [
            DNASequence('-A.a'),
            DNASequence('-A.c'),
            DNASequence('-C.a'),
            DNASequence('-C.c')
        ]
        obs = sorted(DNASequence('-M.m').nondegenerates(), key=str)
        self.assertEqual(obs, exp)