def test_complement_map(self): exp = { '-': '-', '.': '.', 'A': 'U', 'C': 'G', 'B': 'V', 'D': 'H', 'G': 'C', 'H': 'D', 'K': 'M', 'M': 'K', 'N': 'N', 'S': 'S', 'R': 'Y', 'U': 'A', 'W': 'W', 'V': 'B', 'Y': 'R', 'a': 'u', 'c': 'g', 'b': 'v', 'd': 'h', 'g': 'c', 'h': 'd', 'k': 'm', 'm': 'k', 'n': 'n', 's': 's', 'r': 'y', 'u': 'a', 'w': 'w', 'v': 'b', 'y': 'r' } self.assertEqual(self.b1.complement_map(), exp) self.assertEqual(RNASequence.complement_map(), exp)
class RNASequenceTests(TestCase): def setUp(self): self.empty = RNASequence('') self.b1 = RNASequence('GAUUACA') self.b2 = RNASequence( 'ACCGGUACC', id="test-seq-2", description="A test sequence") self.b3 = RNASequence( 'ACCGGTACC', id="bad-seq-1", description="Not a RNA sequence") self.b4 = RNASequence( 'MRWSYKVHDBN', id="degen", description="All of the degenerate bases") self.b5 = RNASequence('.G--AUUAC-A...') def test_alphabet(self): exp = { 'A', 'C', 'B', 'D', 'G', 'H', 'K', 'M', 'N', 'S', 'R', 'U', 'W', 'V', 'Y', 'a', 'c', 'b', 'd', 'g', 'h', 'k', 'm', 'n', 's', 'r', 'u', 'w', 'v', 'y' } self.assertEqual(self.b1.alphabet(), exp) self.assertEqual(RNASequence.alphabet(), exp) def test_gap_alphabet(self): self.assertEqual(self.b1.gap_alphabet(), set('-.')) def test_complement_map(self): exp = { '-': '-', '.': '.', 'A': 'U', 'C': 'G', 'B': 'V', 'D': 'H', 'G': 'C', 'H': 'D', 'K': 'M', 'M': 'K', 'N': 'N', 'S': 'S', 'R': 'Y', 'U': 'A', 'W': 'W', 'V': 'B', 'Y': 'R', 'a': 'u', 'c': 'g', 'b': 'v', 'd': 'h', 'g': 'c', 'h': 'd', 'k': 'm', 'm': 'k', 'n': 'n', 's': 's', 'r': 'y', 'u': 'a', 'w': 'w', 'v': 'b', 'y': 'r' } self.assertEqual(self.b1.complement_map(), exp) self.assertEqual(RNASequence.complement_map(), exp) def test_iupac_standard_characters(self): exp = set("ACGUacgu") self.assertEqual(self.b1.iupac_standard_characters(), exp) self.assertEqual(RNASequence.iupac_standard_characters(), exp) def test_iupac_degeneracies(self): exp = { 'B': set(['C', 'U', 'G']), 'D': set(['A', 'U', 'G']), 'H': set(['A', 'C', 'U']), 'K': set(['U', 'G']), 'M': set(['A', 'C']), 'N': set(['A', 'C', 'U', 'G']), 'S': set(['C', 'G']), 'R': set(['A', 'G']), 'W': set(['A', 'U']), 'V': set(['A', 'C', 'G']), 'Y': set(['C', 'U']), 'b': set(['c', 'u', 'g']), 'd': set(['a', 'u', 'g']), 'h': set(['a', 'c', 'u']), 'k': set(['u', 'g']), 'm': set(['a', 'c']), 'n': set(['a', 'c', 'u', 'g']), 's': set(['c', 'g']), 'r': set(['a', 'g']), 'w': set(['a', 'u']), 'v': set(['a', 'c', 'g']), 'y': set(['c', 'u']) } self.assertEqual(self.b1.iupac_degeneracies(), exp) self.assertEqual(RNASequence.iupac_degeneracies(), exp) def test_iupac_degenerate_characters(self): exp = set(['B', 'D', 'H', 'K', 'M', 'N', 'S', 'R', 'W', 'V', 'Y', 'b', 'd', 'h', 'k', 'm', 'n', 's', 'r', 'w', 'v', 'y']) self.assertEqual(self.b1.iupac_degenerate_characters(), exp) self.assertEqual(RNASequence.iupac_degenerate_characters(), exp) def test_iupac_characters(self): exp = { 'A', 'C', 'B', 'D', 'G', 'H', 'K', 'M', 'N', 'S', 'R', 'U', 'W', 'V', 'Y', 'a', 'c', 'b', 'd', 'g', 'h', 'k', 'm', 'n', 's', 'r', 'u', 'w', 'v', 'y' } self.assertEqual(self.b1.iupac_characters(), exp) self.assertEqual(RNASequence.iupac_characters(), exp) def test_complement(self): self.assertEqual(self.b1.complement(), RNASequence("CUAAUGU")) self.assertEqual(self.b2.complement(), RNASequence("UGGCCAUGG")) self.assertRaises(BiologicalSequenceError, self.b3.complement) self.assertEqual(self.b4.complement(), RNASequence("KYWSRMBDHVN")) self.assertEqual(self.b5.complement(), RNASequence(".C--UAAUG-U...")) def test_reverse_complement(self): self.assertEqual(self.b1.reverse_complement(), RNASequence("UGUAAUC")) self.assertEqual(self.b2.reverse_complement(), RNASequence("GGUACCGGU")) self.assertRaises(BiologicalSequenceError, self.b3.reverse_complement) self.assertEqual(self.b4.reverse_complement(), RNASequence("NVHDBMRSWYK")) def test_unsupported_characters(self): self.assertEqual(self.b1.unsupported_characters(), set()) self.assertEqual(self.b2.unsupported_characters(), set()) self.assertEqual(self.b3.unsupported_characters(), set('T')) self.assertEqual(self.b4.unsupported_characters(), set()) def test_has_unsupported_characters(self): self.assertFalse(self.b1.has_unsupported_characters()) self.assertFalse(self.b2.has_unsupported_characters()) self.assertTrue(self.b3.has_unsupported_characters()) self.assertFalse(self.b4.has_unsupported_characters()) def test_is_reverse_complement(self): self.assertFalse(self.b1.is_reverse_complement(self.b1)) self.assertTrue( self.b1.is_reverse_complement(RNASequence('UGUAAUC'))) self.assertTrue( self.b4.is_reverse_complement(RNASequence('NVHDBMRSWYK'))) def test_nondegenerates_invalid(self): with self.assertRaises(BiologicalSequenceError): list(RNASequence('AZA').nondegenerates()) def test_nondegenerates_empty(self): self.assertEqual(list(self.empty.nondegenerates()), [self.empty]) def test_nondegenerates_no_degens(self): self.assertEqual(list(self.b1.nondegenerates()), [self.b1]) def test_nondegenerates_all_degens(self): # Same chars. exp = [RNASequence('CC'), RNASequence('CG'), RNASequence('GC'), RNASequence('GG')] # Sort based on sequence string, as order is not guaranteed. obs = sorted(RNASequence('SS').nondegenerates(), key=str) self.assertEqual(obs, exp) # Different chars. exp = [RNASequence('AC'), RNASequence('AG'), RNASequence('GC'), RNASequence('GG')] obs = sorted(RNASequence('RS').nondegenerates(), key=str) self.assertEqual(obs, exp) # Odd number of chars. obs = list(RNASequence('NNN').nondegenerates()) self.assertEqual(len(obs), 4**3) def test_nondegenerates_mixed_degens(self): exp = [RNASequence('AGC'), RNASequence('AGU'), RNASequence('GGC'), RNASequence('GGU')] obs = sorted(RNASequence('RGY').nondegenerates(), key=str) self.assertEqual(obs, exp) def test_nondegenerates_gap_mixed_case(self): exp = [RNASequence('-A.a'), RNASequence('-A.c'), RNASequence('-C.a'), RNASequence('-C.c')] obs = sorted(RNASequence('-M.m').nondegenerates(), key=str) self.assertEqual(obs, exp)
class RNASequenceTests(TestCase): def setUp(self): self.empty = RNASequence('') self.b1 = RNASequence('GAUUACA') self.b2 = RNASequence('ACCGGUACC', id="test-seq-2", description="A test sequence") self.b3 = RNASequence('ACCGGTACC', id="bad-seq-1", description="Not a RNA sequence") self.b4 = RNASequence('MRWSYKVHDBN', id="degen", description="All of the degenerate bases") self.b5 = RNASequence('.G--AUUAC-A...') def test_alphabet(self): exp = { 'A', 'C', 'B', 'D', 'G', 'H', 'K', 'M', 'N', 'S', 'R', 'U', 'W', 'V', 'Y', 'a', 'c', 'b', 'd', 'g', 'h', 'k', 'm', 'n', 's', 'r', 'u', 'w', 'v', 'y' } self.assertEqual(self.b1.alphabet(), exp) self.assertEqual(RNASequence.alphabet(), exp) def test_gap_alphabet(self): self.assertEqual(self.b1.gap_alphabet(), set('-.')) def test_complement_map(self): exp = { '-': '-', '.': '.', 'A': 'U', 'C': 'G', 'B': 'V', 'D': 'H', 'G': 'C', 'H': 'D', 'K': 'M', 'M': 'K', 'N': 'N', 'S': 'S', 'R': 'Y', 'U': 'A', 'W': 'W', 'V': 'B', 'Y': 'R', 'a': 'u', 'c': 'g', 'b': 'v', 'd': 'h', 'g': 'c', 'h': 'd', 'k': 'm', 'm': 'k', 'n': 'n', 's': 's', 'r': 'y', 'u': 'a', 'w': 'w', 'v': 'b', 'y': 'r' } self.assertEqual(self.b1.complement_map(), exp) self.assertEqual(RNASequence.complement_map(), exp) def test_iupac_standard_characters(self): exp = set("ACGUacgu") self.assertEqual(self.b1.iupac_standard_characters(), exp) self.assertEqual(RNASequence.iupac_standard_characters(), exp) def test_iupac_degeneracies(self): exp = { 'B': set(['C', 'U', 'G']), 'D': set(['A', 'U', 'G']), 'H': set(['A', 'C', 'U']), 'K': set(['U', 'G']), 'M': set(['A', 'C']), 'N': set(['A', 'C', 'U', 'G']), 'S': set(['C', 'G']), 'R': set(['A', 'G']), 'W': set(['A', 'U']), 'V': set(['A', 'C', 'G']), 'Y': set(['C', 'U']), 'b': set(['c', 'u', 'g']), 'd': set(['a', 'u', 'g']), 'h': set(['a', 'c', 'u']), 'k': set(['u', 'g']), 'm': set(['a', 'c']), 'n': set(['a', 'c', 'u', 'g']), 's': set(['c', 'g']), 'r': set(['a', 'g']), 'w': set(['a', 'u']), 'v': set(['a', 'c', 'g']), 'y': set(['c', 'u']) } self.assertEqual(self.b1.iupac_degeneracies(), exp) self.assertEqual(RNASequence.iupac_degeneracies(), exp) def test_iupac_degenerate_characters(self): exp = set([ 'B', 'D', 'H', 'K', 'M', 'N', 'S', 'R', 'W', 'V', 'Y', 'b', 'd', 'h', 'k', 'm', 'n', 's', 'r', 'w', 'v', 'y' ]) self.assertEqual(self.b1.iupac_degenerate_characters(), exp) self.assertEqual(RNASequence.iupac_degenerate_characters(), exp) def test_iupac_characters(self): exp = { 'A', 'C', 'B', 'D', 'G', 'H', 'K', 'M', 'N', 'S', 'R', 'U', 'W', 'V', 'Y', 'a', 'c', 'b', 'd', 'g', 'h', 'k', 'm', 'n', 's', 'r', 'u', 'w', 'v', 'y' } self.assertEqual(self.b1.iupac_characters(), exp) self.assertEqual(RNASequence.iupac_characters(), exp) def test_complement(self): self.assertEqual(self.b1.complement(), RNASequence("CUAAUGU")) self.assertEqual(self.b2.complement(), RNASequence("UGGCCAUGG")) self.assertRaises(BiologicalSequenceError, self.b3.complement) self.assertEqual(self.b4.complement(), RNASequence("KYWSRMBDHVN")) self.assertEqual(self.b5.complement(), RNASequence(".C--UAAUG-U...")) def test_reverse_complement(self): self.assertEqual(self.b1.reverse_complement(), RNASequence("UGUAAUC")) self.assertEqual(self.b2.reverse_complement(), RNASequence("GGUACCGGU")) self.assertRaises(BiologicalSequenceError, self.b3.reverse_complement) self.assertEqual(self.b4.reverse_complement(), RNASequence("NVHDBMRSWYK")) def test_unsupported_characters(self): self.assertEqual(self.b1.unsupported_characters(), set()) self.assertEqual(self.b2.unsupported_characters(), set()) self.assertEqual(self.b3.unsupported_characters(), set('T')) self.assertEqual(self.b4.unsupported_characters(), set()) def test_has_unsupported_characters(self): self.assertFalse(self.b1.has_unsupported_characters()) self.assertFalse(self.b2.has_unsupported_characters()) self.assertTrue(self.b3.has_unsupported_characters()) self.assertFalse(self.b4.has_unsupported_characters()) def test_is_reverse_complement(self): self.assertFalse(self.b1.is_reverse_complement(self.b1)) self.assertTrue(self.b1.is_reverse_complement(RNASequence('UGUAAUC'))) self.assertTrue( self.b4.is_reverse_complement(RNASequence('NVHDBMRSWYK'))) def test_nondegenerates_invalid(self): with self.assertRaises(BiologicalSequenceError): list(RNASequence('AZA').nondegenerates()) def test_nondegenerates_empty(self): self.assertEqual(list(self.empty.nondegenerates()), [self.empty]) def test_nondegenerates_no_degens(self): self.assertEqual(list(self.b1.nondegenerates()), [self.b1]) def test_nondegenerates_all_degens(self): # Same chars. exp = [ RNASequence('CC'), RNASequence('CG'), RNASequence('GC'), RNASequence('GG') ] # Sort based on sequence string, as order is not guaranteed. obs = sorted(RNASequence('SS').nondegenerates(), key=str) self.assertEqual(obs, exp) # Different chars. exp = [ RNASequence('AC'), RNASequence('AG'), RNASequence('GC'), RNASequence('GG') ] obs = sorted(RNASequence('RS').nondegenerates(), key=str) self.assertEqual(obs, exp) # Odd number of chars. obs = list(RNASequence('NNN').nondegenerates()) self.assertEqual(len(obs), 4**3) def test_nondegenerates_mixed_degens(self): exp = [ RNASequence('AGC'), RNASequence('AGU'), RNASequence('GGC'), RNASequence('GGU') ] obs = sorted(RNASequence('RGY').nondegenerates(), key=str) self.assertEqual(obs, exp) def test_nondegenerates_gap_mixed_case(self): exp = [ RNASequence('-A.a'), RNASequence('-A.c'), RNASequence('-C.a'), RNASequence('-C.c') ] obs = sorted(RNASequence('-M.m').nondegenerates(), key=str) self.assertEqual(obs, exp)