def test_iupac_degeneracies(self):
        exp = {
            # upper
            'B': set(['C', 'U', 'T', 'G']), 'D': set(['A', 'U', 'T', 'G']),
            'H': set(['A', 'C', 'U', 'T']), 'K': set(['U', 'T', 'G']),
            'M': set(['A', 'C']), 'N': set(['A', 'C', 'U', 'T', 'G']),
            'S': set(['C', 'G']), 'R': set(['A', 'G']),
            'W': set(['A', 'U', 'T']), 'V': set(['A', 'C', 'G']),
            'Y': set(['C', 'U', 'T']),
            # lower
            'b': set(['c', 'u', 't', 'g']), 'd': set(['a', 'u', 't', 'g']),
            'h': set(['a', 'c', 'u', 't']), 'k': set(['u', 't', 'g']),
            'm': set(['a', 'c']), 'n': set(['a', 'c', 'u', 't', 'g']),
            's': set(['c', 'g']), 'r': set(['a', 'g']),
            'w': set(['a', 'u', 't']), 'v': set(['a', 'c', 'g']),
            'y': set(['c', 'u', 't'])
        }
        self.assertEqual(self.b1.iupac_degeneracies(), exp)
        self.assertEqual(NucleotideSequence.iupac_degeneracies(), exp)

        # Test that we can modify a copy of the mapping without altering the
        # canonical representation.
        degen = NucleotideSequence.iupac_degeneracies()
        degen.update({'V': set("BRO"), 'Z': set("ZORRO")})
        self.assertNotEqual(degen, exp)
        self.assertEqual(NucleotideSequence.iupac_degeneracies(), exp)
Esempio n. 2
0
    def test_iupac_degeneracies(self):
        exp = {
            # upper
            'B': set(['C', 'U', 'T', 'G']),
            'D': set(['A', 'U', 'T', 'G']),
            'H': set(['A', 'C', 'U', 'T']),
            'K': set(['U', 'T', 'G']),
            'M': set(['A', 'C']),
            'N': set(['A', 'C', 'U', 'T', 'G']),
            'S': set(['C', 'G']),
            'R': set(['A', 'G']),
            'W': set(['A', 'U', 'T']),
            'V': set(['A', 'C', 'G']),
            'Y': set(['C', 'U', 'T']),
            # lower
            'b': set(['c', 'u', 't', 'g']),
            'd': set(['a', 'u', 't', 'g']),
            'h': set(['a', 'c', 'u', 't']),
            'k': set(['u', 't', 'g']),
            'm': set(['a', 'c']),
            'n': set(['a', 'c', 'u', 't', 'g']),
            's': set(['c', 'g']),
            'r': set(['a', 'g']),
            'w': set(['a', 'u', 't']),
            'v': set(['a', 'c', 'g']),
            'y': set(['c', 'u', 't'])
        }
        self.assertEqual(self.b1.iupac_degeneracies(), exp)
        self.assertEqual(NucleotideSequence.iupac_degeneracies(), exp)

        # Test that we can modify a copy of the mapping without altering the
        # canonical representation.
        degen = NucleotideSequence.iupac_degeneracies()
        degen.update({'V': set("BRO"), 'Z': set("ZORRO")})
        self.assertNotEqual(degen, exp)
        self.assertEqual(NucleotideSequence.iupac_degeneracies(), exp)
class NucelotideSequenceTests(TestCase):

    def setUp(self):
        self.empty = NucleotideSequence('')
        self.b1 = NucleotideSequence('GATTACA')
        self.b2 = NucleotideSequence(
            'ACCGGUACC', id="test-seq-2",
            description="A test sequence")

    def test_alphabet(self):
        exp = {
            'A', 'C', 'B', 'D', 'G', 'H', 'K', 'M', 'N', 'S', 'R', 'U', 'T',
            'W', 'V', 'Y', 'a', 'c', 'b', 'd', 'g', 'h', 'k', 'm', 'n', 's',
            'r', 'u', 't', 'w', 'v', 'y'
        }

        # Test calling from an instance and purely static context.
        self.assertEqual(self.b1.alphabet(), exp)
        self.assertEqual(NucleotideSequence.alphabet(), exp)

    def test_gap_alphabet(self):
        self.assertEqual(self.b1.gap_alphabet(), set('-.'))

    def test_complement_map(self):
        exp = {}
        self.assertEqual(self.b1.complement_map(), exp)
        self.assertEqual(NucleotideSequence.complement_map(), exp)

    def test_iupac_standard_characters(self):
        exp = set("ACGTUacgtu")
        self.assertEqual(self.b1.iupac_standard_characters(), exp)
        self.assertEqual(NucleotideSequence.iupac_standard_characters(), exp)

    def test_iupac_degeneracies(self):
        exp = {
            # upper
            'B': set(['C', 'U', 'T', 'G']), 'D': set(['A', 'U', 'T', 'G']),
            'H': set(['A', 'C', 'U', 'T']), 'K': set(['U', 'T', 'G']),
            'M': set(['A', 'C']), 'N': set(['A', 'C', 'U', 'T', 'G']),
            'S': set(['C', 'G']), 'R': set(['A', 'G']),
            'W': set(['A', 'U', 'T']), 'V': set(['A', 'C', 'G']),
            'Y': set(['C', 'U', 'T']),
            # lower
            'b': set(['c', 'u', 't', 'g']), 'd': set(['a', 'u', 't', 'g']),
            'h': set(['a', 'c', 'u', 't']), 'k': set(['u', 't', 'g']),
            'm': set(['a', 'c']), 'n': set(['a', 'c', 'u', 't', 'g']),
            's': set(['c', 'g']), 'r': set(['a', 'g']),
            'w': set(['a', 'u', 't']), 'v': set(['a', 'c', 'g']),
            'y': set(['c', 'u', 't'])
        }
        self.assertEqual(self.b1.iupac_degeneracies(), exp)
        self.assertEqual(NucleotideSequence.iupac_degeneracies(), exp)

        # Test that we can modify a copy of the mapping without altering the
        # canonical representation.
        degen = NucleotideSequence.iupac_degeneracies()
        degen.update({'V': set("BRO"), 'Z': set("ZORRO")})
        self.assertNotEqual(degen, exp)
        self.assertEqual(NucleotideSequence.iupac_degeneracies(), exp)

    def test_iupac_degenerate_characters(self):
        exp = set(['B', 'D', 'H', 'K', 'M', 'N', 'S', 'R', 'W', 'V', 'Y',
                   'b', 'd', 'h', 'k', 'm', 'n', 's', 'r', 'w', 'v', 'y'])
        self.assertEqual(self.b1.iupac_degenerate_characters(), exp)
        self.assertEqual(NucleotideSequence.iupac_degenerate_characters(), exp)

    def test_iupac_characters(self):
        exp = {
            'A', 'C', 'B', 'D', 'G', 'H', 'K', 'M', 'N', 'S', 'R', 'U', 'T',
            'W', 'V', 'Y', 'a', 'c', 'b', 'd', 'g', 'h', 'k', 'm', 'n', 's',
            'r', 'u', 't', 'w', 'v', 'y'
        }

        self.assertEqual(self.b1.iupac_characters(), exp)
        self.assertEqual(NucleotideSequence.iupac_characters(), exp)

    def test_complement(self):
        self.assertRaises(BiologicalSequenceError,
                          self.b1.complement)

    def test_reverse_complement(self):
        self.assertRaises(BiologicalSequenceError,
                          self.b1.reverse_complement)

    def test_is_reverse_complement(self):
        self.assertRaises(BiologicalSequenceError,
                          self.b1.is_reverse_complement, self.b1)

    def test_nondegenerates_invalid(self):
        with self.assertRaises(BiologicalSequenceError):
            list(NucleotideSequence('AZA').nondegenerates())

    def test_nondegenerates_empty(self):
        self.assertEqual(list(self.empty.nondegenerates()), [self.empty])

    def test_nondegenerates_no_degens(self):
        self.assertEqual(list(self.b1.nondegenerates()), [self.b1])

    def test_nondegenerates_all_degens(self):
        # Same chars.
        exp = [NucleotideSequence('CC'), NucleotideSequence('CG'),
               NucleotideSequence('GC'), NucleotideSequence('GG')]
        # Sort based on sequence string, as order is not guaranteed.
        obs = sorted(NucleotideSequence('SS').nondegenerates(), key=str)
        self.assertEqual(obs, exp)

        # Different chars.
        exp = [NucleotideSequence('AC'), NucleotideSequence('AG'),
               NucleotideSequence('GC'), NucleotideSequence('GG')]
        obs = sorted(NucleotideSequence('RS').nondegenerates(), key=str)
        self.assertEqual(obs, exp)

        # Odd number of chars.
        obs = list(NucleotideSequence('NNN').nondegenerates())
        self.assertEqual(len(obs), 5**3)

    def test_nondegenerates_mixed_degens(self):
        exp = [NucleotideSequence('AGC'), NucleotideSequence('AGT'),
               NucleotideSequence('AGU'), NucleotideSequence('GGC'),
               NucleotideSequence('GGT'), NucleotideSequence('GGU')]
        obs = sorted(NucleotideSequence('RGY').nondegenerates(), key=str)
        self.assertEqual(obs, exp)

    def test_nondegenerates_gap_mixed_case(self):
        exp = [NucleotideSequence('-A.a'), NucleotideSequence('-A.c'),
               NucleotideSequence('-C.a'), NucleotideSequence('-C.c')]
        obs = sorted(NucleotideSequence('-M.m').nondegenerates(), key=str)
        self.assertEqual(obs, exp)
Esempio n. 4
0
class NucelotideSequenceTests(TestCase):
    def setUp(self):
        self.empty = NucleotideSequence('')
        self.b1 = NucleotideSequence('GATTACA')
        self.b2 = NucleotideSequence('ACCGGUACC',
                                     id="test-seq-2",
                                     description="A test sequence")

    def test_alphabet(self):
        exp = {
            'A', 'C', 'B', 'D', 'G', 'H', 'K', 'M', 'N', 'S', 'R', 'U', 'T',
            'W', 'V', 'Y', 'a', 'c', 'b', 'd', 'g', 'h', 'k', 'm', 'n', 's',
            'r', 'u', 't', 'w', 'v', 'y'
        }

        # Test calling from an instance and purely static context.
        self.assertEqual(self.b1.alphabet(), exp)
        self.assertEqual(NucleotideSequence.alphabet(), exp)

    def test_gap_alphabet(self):
        self.assertEqual(self.b1.gap_alphabet(), set('-.'))

    def test_complement_map(self):
        exp = {}
        self.assertEqual(self.b1.complement_map(), exp)
        self.assertEqual(NucleotideSequence.complement_map(), exp)

    def test_iupac_standard_characters(self):
        exp = set("ACGTUacgtu")
        self.assertEqual(self.b1.iupac_standard_characters(), exp)
        self.assertEqual(NucleotideSequence.iupac_standard_characters(), exp)

    def test_iupac_degeneracies(self):
        exp = {
            # upper
            'B': set(['C', 'U', 'T', 'G']),
            'D': set(['A', 'U', 'T', 'G']),
            'H': set(['A', 'C', 'U', 'T']),
            'K': set(['U', 'T', 'G']),
            'M': set(['A', 'C']),
            'N': set(['A', 'C', 'U', 'T', 'G']),
            'S': set(['C', 'G']),
            'R': set(['A', 'G']),
            'W': set(['A', 'U', 'T']),
            'V': set(['A', 'C', 'G']),
            'Y': set(['C', 'U', 'T']),
            # lower
            'b': set(['c', 'u', 't', 'g']),
            'd': set(['a', 'u', 't', 'g']),
            'h': set(['a', 'c', 'u', 't']),
            'k': set(['u', 't', 'g']),
            'm': set(['a', 'c']),
            'n': set(['a', 'c', 'u', 't', 'g']),
            's': set(['c', 'g']),
            'r': set(['a', 'g']),
            'w': set(['a', 'u', 't']),
            'v': set(['a', 'c', 'g']),
            'y': set(['c', 'u', 't'])
        }
        self.assertEqual(self.b1.iupac_degeneracies(), exp)
        self.assertEqual(NucleotideSequence.iupac_degeneracies(), exp)

        # Test that we can modify a copy of the mapping without altering the
        # canonical representation.
        degen = NucleotideSequence.iupac_degeneracies()
        degen.update({'V': set("BRO"), 'Z': set("ZORRO")})
        self.assertNotEqual(degen, exp)
        self.assertEqual(NucleotideSequence.iupac_degeneracies(), exp)

    def test_iupac_degenerate_characters(self):
        exp = set([
            'B', 'D', 'H', 'K', 'M', 'N', 'S', 'R', 'W', 'V', 'Y', 'b', 'd',
            'h', 'k', 'm', 'n', 's', 'r', 'w', 'v', 'y'
        ])
        self.assertEqual(self.b1.iupac_degenerate_characters(), exp)
        self.assertEqual(NucleotideSequence.iupac_degenerate_characters(), exp)

    def test_iupac_characters(self):
        exp = {
            'A', 'C', 'B', 'D', 'G', 'H', 'K', 'M', 'N', 'S', 'R', 'U', 'T',
            'W', 'V', 'Y', 'a', 'c', 'b', 'd', 'g', 'h', 'k', 'm', 'n', 's',
            'r', 'u', 't', 'w', 'v', 'y'
        }

        self.assertEqual(self.b1.iupac_characters(), exp)
        self.assertEqual(NucleotideSequence.iupac_characters(), exp)

    def test_complement(self):
        self.assertRaises(BiologicalSequenceError, self.b1.complement)

    def test_reverse_complement(self):
        self.assertRaises(BiologicalSequenceError, self.b1.reverse_complement)

    def test_is_reverse_complement(self):
        self.assertRaises(BiologicalSequenceError,
                          self.b1.is_reverse_complement, self.b1)

    def test_nondegenerates_invalid(self):
        with self.assertRaises(BiologicalSequenceError):
            list(NucleotideSequence('AZA').nondegenerates())

    def test_nondegenerates_empty(self):
        self.assertEqual(list(self.empty.nondegenerates()), [self.empty])

    def test_nondegenerates_no_degens(self):
        self.assertEqual(list(self.b1.nondegenerates()), [self.b1])

    def test_nondegenerates_all_degens(self):
        # Same chars.
        exp = [
            NucleotideSequence('CC'),
            NucleotideSequence('CG'),
            NucleotideSequence('GC'),
            NucleotideSequence('GG')
        ]
        # Sort based on sequence string, as order is not guaranteed.
        obs = sorted(NucleotideSequence('SS').nondegenerates(), key=str)
        self.assertEqual(obs, exp)

        # Different chars.
        exp = [
            NucleotideSequence('AC'),
            NucleotideSequence('AG'),
            NucleotideSequence('GC'),
            NucleotideSequence('GG')
        ]
        obs = sorted(NucleotideSequence('RS').nondegenerates(), key=str)
        self.assertEqual(obs, exp)

        # Odd number of chars.
        obs = list(NucleotideSequence('NNN').nondegenerates())
        self.assertEqual(len(obs), 5**3)

    def test_nondegenerates_mixed_degens(self):
        exp = [
            NucleotideSequence('AGC'),
            NucleotideSequence('AGT'),
            NucleotideSequence('AGU'),
            NucleotideSequence('GGC'),
            NucleotideSequence('GGT'),
            NucleotideSequence('GGU')
        ]
        obs = sorted(NucleotideSequence('RGY').nondegenerates(), key=str)
        self.assertEqual(obs, exp)

    def test_nondegenerates_gap_mixed_case(self):
        exp = [
            NucleotideSequence('-A.a'),
            NucleotideSequence('-A.c'),
            NucleotideSequence('-C.a'),
            NucleotideSequence('-C.c')
        ]
        obs = sorted(NucleotideSequence('-M.m').nondegenerates(), key=str)
        self.assertEqual(obs, exp)