Beispiel #1
0
    def test_refresh_primers(self):
        """Module should appear in correct location with primers"""
        first_module = Module('AAAAA', '(((((')
        second_module = Module('UUUUU', ')))))')
        rule_1 = Rule(0, 0, 1, 4, 5)
        helix = Motif([first_module, second_module], [rule_1])
        model = SequenceModel(constants=[ConstantRegion('AAAAA'), \
            ConstantRegion('UUUUU')], order='C0 - C1', \
            composition=BaseFrequency('A'))
        embedder = SequenceEmbedder(length=30, num_to_do=100, \
            motif=helix, model=model, composition=BaseFrequency('CG'), \
            positions=[3, 6], primer_5 = 'UUU', primer_3 = 'AAA')

        last = ''
        for i in range(100):
            embedder.refresh()
            curr = str(embedder)
            self.assertEqual(curr[0:3], 'UUU')
            self.assertEqual(curr[6:11], 'AAAAA')
            self.assertEqual(curr[14:19], 'UUUUU')
            self.assertEqual(curr.count('A'), 8)
            self.assertEqual(curr.count('U'), 8)
            self.assertEqual(curr[-3:], 'AAA')
            self.assertNotEqual(last, curr)
            last = curr
Beispiel #2
0
    def setUp(self):
        """Define a few standard models and motifs"""
        ile_mod_0 = Module('NNNCUACNNNNN', '(((((..(((((')
        ile_mod_1 = Module('NNNNNUAUUGGGGNNN', ')))))......)))))')
        ile_rule_0 = Rule(0, 0, 1, 15, 5)
        ile_rule_1 = Rule(0, 7, 1, 4, 5)
        ile_motif = Motif([ile_mod_0, ile_mod_1], \
            [ile_rule_0, ile_rule_1])

        helices = [PairedRegion('NNN'), PairedRegion('NNNNN')]
        constants = [ConstantRegion('CUAC'), ConstantRegion('UAUUGGGG')]
        order = "H0 C0 H1 - H1 C1 H0"
        ile_model = SequenceModel(order=order, constants=constants, \
            helices=helices, composition=BaseFrequency('UCAG'))

        self.ile_embedder = SequenceEmbedder(length=50, num_to_do=10, \
            motif=ile_motif, model=ile_model, composition=BaseFrequency('UCAG'))

        short_ile_mod_0 = Module('NCUACNN', '(((..((')
        short_ile_mod_1 = Module('NNUAUUGGGGN', '))......)))')
        short_ile_rule_0 = Rule(0, 0, 1, 10, 3)
        short_ile_rule_1 = Rule(0, 5, 1, 1, 2)
        short_ile_motif = Motif([short_ile_mod_0, short_ile_mod_1], \
            [short_ile_rule_0, short_ile_rule_1])

        short_helices = [PairedRegion('N'), PairedRegion('NN')]
        short_constants = [ConstantRegion('CUAC'), ConstantRegion('UAUUGGGG')]
        short_order = "H0 C0 H1 - H1 C1 H0"
        short_ile_model = SequenceModel(order=short_order, \
            constants=short_constants, \
            helices=short_helices, composition=BaseFrequency('UCAG'))

        self.short_ile_embedder = SequenceEmbedder(length=50, num_to_do=10, \
            motif=short_ile_motif, model=short_ile_model, \
            composition=BaseFrequency('UCAG'))
    def setUp(self):
        """Define a few standard models and motifs"""
        ile_mod_0 = Module("NNNCUACNNNNN", "(((((..(((((")
        ile_mod_1 = Module("NNNNNUAUUGGGGNNN", ")))))......)))))")
        ile_rule_0 = Rule(0, 0, 1, 15, 5)
        ile_rule_1 = Rule(0, 7, 1, 4, 5)
        ile_motif = Motif([ile_mod_0, ile_mod_1], [ile_rule_0, ile_rule_1])

        helices = [PairedRegion("NNN"), PairedRegion("NNNNN")]
        constants = [ConstantRegion("CUAC"), ConstantRegion("UAUUGGGG")]
        order = "H0 C0 H1 - H1 C1 H0"
        ile_model = SequenceModel(order=order, constants=constants, helices=helices, composition=BaseFrequency("UCAG"))

        self.ile_embedder = SequenceEmbedder(
            length=50, num_to_do=10, motif=ile_motif, model=ile_model, composition=BaseFrequency("UCAG")
        )

        short_ile_mod_0 = Module("NCUACNN", "(((..((")
        short_ile_mod_1 = Module("NNUAUUGGGGN", "))......)))")
        short_ile_rule_0 = Rule(0, 0, 1, 10, 3)
        short_ile_rule_1 = Rule(0, 5, 1, 1, 2)
        short_ile_motif = Motif([short_ile_mod_0, short_ile_mod_1], [short_ile_rule_0, short_ile_rule_1])

        short_helices = [PairedRegion("N"), PairedRegion("NN")]
        short_constants = [ConstantRegion("CUAC"), ConstantRegion("UAUUGGGG")]
        short_order = "H0 C0 H1 - H1 C1 H0"
        short_ile_model = SequenceModel(
            order=short_order, constants=short_constants, helices=short_helices, composition=BaseFrequency("UCAG")
        )

        self.short_ile_embedder = SequenceEmbedder(
            length=50, num_to_do=10, motif=short_ile_motif, model=short_ile_model, composition=BaseFrequency("UCAG")
        )
    def test_refresh_primers(self):
        """Module should appear in correct location with primers"""
        first_module = Module("AAAAA", "(((((")
        second_module = Module("UUUUU", ")))))")
        rule_1 = Rule(0, 0, 1, 4, 5)
        helix = Motif([first_module, second_module], [rule_1])
        model = SequenceModel(
            constants=[ConstantRegion("AAAAA"), ConstantRegion("UUUUU")],
            order="C0 - C1",
            composition=BaseFrequency("A"),
        )
        embedder = SequenceEmbedder(
            length=30,
            num_to_do=100,
            motif=helix,
            model=model,
            composition=BaseFrequency("CG"),
            positions=[3, 6],
            primer_5="UUU",
            primer_3="AAA",
        )

        last = ""
        for i in range(100):
            embedder.refresh()
            curr = str(embedder)
            self.assertEqual(curr[0:3], "UUU")
            self.assertEqual(curr[6:11], "AAAAA")
            self.assertEqual(curr[14:19], "UUUUU")
            self.assertEqual(curr.count("A"), 8)
            self.assertEqual(curr.count("U"), 8)
            self.assertEqual(curr[-3:], "AAA")
            self.assertNotEqual(last, curr)
            last = curr
    def test_refresh_specific_position(self):
        """Should always find the module in the same position if specified"""
        first_module = Module("AAAAA", "(((((")
        second_module = Module("UUUUU", ")))))")
        rule_1 = Rule(0, 0, 1, 4, 5)
        helix = Motif([first_module, second_module], [rule_1])
        model = SequenceModel(
            constants=[ConstantRegion("AAAAA"), ConstantRegion("UUUUU")],
            order="C0 - C1",
            composition=BaseFrequency("A"),
        )
        embedder = SequenceEmbedder(
            length=30, num_to_do=100, motif=helix, model=model, composition=BaseFrequency("CG"), positions=[3, 6]
        )

        last = ""
        for i in range(100):
            embedder.refresh()
            curr = str(embedder)
            self.assertEqual(curr[3:8], "AAAAA")
            self.assertEqual(curr[11:16], "UUUUU")
            self.assertEqual(curr.count("A"), 5)
            self.assertEqual(curr.count("U"), 5)
            self.assertNotEqual(last, curr)
            last = curr
Beispiel #6
0
    def test_refresh_specific_position(self):
        """Should always find the module in the same position if specified"""
        first_module = Module('AAAAA', '(((((')
        second_module = Module('UUUUU', ')))))')
        rule_1 = Rule(0, 0, 1, 4, 5)
        helix = Motif([first_module, second_module], [rule_1])
        model = SequenceModel(constants=[ConstantRegion('AAAAA'), \
            ConstantRegion('UUUUU')], order='C0 - C1', \
            composition=BaseFrequency('A'))
        embedder = SequenceEmbedder(length=30, num_to_do=100, \
            motif=helix, model=model, composition=BaseFrequency('CG'), \
            positions=[3, 6])

        last = ''
        for i in range(100):
            embedder.refresh()
            curr = str(embedder)
            self.assertEqual(curr[3:8], 'AAAAA')
            self.assertEqual(curr[11:16], 'UUUUU')
            self.assertEqual(curr.count('A'), 5)
            self.assertEqual(curr.count('U'), 5)
            self.assertNotEqual(last, curr)
            last = curr
Beispiel #7
0
class SequenceEmbedderTests(TestCase):
    """Tests of the SequenceEmbedder class."""
    def setUp(self):
        """Define a few standard models and motifs"""
        ile_mod_0 = Module('NNNCUACNNNNN', '(((((..(((((')
        ile_mod_1 = Module('NNNNNUAUUGGGGNNN', ')))))......)))))')
        ile_rule_0 = Rule(0, 0, 1, 15, 5)
        ile_rule_1 = Rule(0, 7, 1, 4, 5)
        ile_motif = Motif([ile_mod_0, ile_mod_1], \
            [ile_rule_0, ile_rule_1])

        helices = [PairedRegion('NNN'), PairedRegion('NNNNN')]
        constants = [ConstantRegion('CUAC'), ConstantRegion('UAUUGGGG')]
        order = "H0 C0 H1 - H1 C1 H0"
        ile_model = SequenceModel(order=order, constants=constants, \
            helices=helices, composition=BaseFrequency('UCAG'))

        self.ile_embedder = SequenceEmbedder(length=50, num_to_do=10, \
            motif=ile_motif, model=ile_model, composition=BaseFrequency('UCAG'))

        short_ile_mod_0 = Module('NCUACNN', '(((..((')
        short_ile_mod_1 = Module('NNUAUUGGGGN', '))......)))')
        short_ile_rule_0 = Rule(0, 0, 1, 10, 3)
        short_ile_rule_1 = Rule(0, 5, 1, 1, 2)
        short_ile_motif = Motif([short_ile_mod_0, short_ile_mod_1], \
            [short_ile_rule_0, short_ile_rule_1])

        short_helices = [PairedRegion('N'), PairedRegion('NN')]
        short_constants = [ConstantRegion('CUAC'), ConstantRegion('UAUUGGGG')]
        short_order = "H0 C0 H1 - H1 C1 H0"
        short_ile_model = SequenceModel(order=short_order, \
            constants=short_constants, \
            helices=short_helices, composition=BaseFrequency('UCAG'))

        self.short_ile_embedder = SequenceEmbedder(length=50, num_to_do=10, \
            motif=short_ile_motif, model=short_ile_model, \
            composition=BaseFrequency('UCAG'))


    def test_composition_change(self):
        """Changes in composition should propagate."""
        rr = str(self.ile_embedder.RandomRegion.Current)
        #for base in 'UCAG':
        #    assert base in rr
        #the above two lines should generally be true but fail stochastically
        self.ile_embedder.Composition = BaseFrequency('CG')
        self.assertEqual(self.ile_embedder.Model.Composition, \
            BaseFrequency('CG'))
        self.assertEqual(self.ile_embedder.RandomRegion.Composition, \
            BaseFrequency('CG'))
        self.ile_embedder.RandomRegion.refresh()
        self.assertEqual(len(self.ile_embedder.RandomRegion), 22)
        rr = str(self.ile_embedder.RandomRegion.Current)
        assert ('C' in rr or 'G' in rr)
        assert 'A' not in rr
        assert 'U' not in rr

    def test_choose_locations_too_short(self):
        """SequenceEmbedder _choose_locations should fail if too little space"""
        self.ile_embedder.Length = 28   #no positions left over
        self.assertRaises(ValueError, self.ile_embedder._choose_locations)
        self.ile_embedder.Length = 29   #one position left over
        self.assertRaises(ValueError, self.ile_embedder._choose_locations)

    def test_choose_locations_exact(self):
        """SequenceEmbedder _choose_locations should pick all locations"""
        self.ile_embedder.Length = 30   #two positions left: must both be filled
        for i in range(10):
            first, second = self.ile_embedder._choose_locations()
            self.assertEqual(first, 0)
            self.assertEqual(second, 1)
            
    def test_choose_locations_even(self):
        """SequenceEmbedder _choose_locations should pick locations evenly"""
        self.ile_embedder.Length = 31   #three positions left
        counts = {}
        for i in range(1000):
            key = tuple(self.ile_embedder._choose_locations())
            assert key[0] != key[1]
            curr = counts.get(key, 0)
            counts[key] = curr + 1
        expected = [333, 333, 333]
        observed = [counts[(0,1)], counts[(0,2)], counts[(1,2)]]
        self.assertSimilarFreqs(observed, expected)
        #make sure nothing else snuck in there
        self.assertEqual(counts[(0,1)]+counts[(0,2)]+counts[(1,2)], 1000)

    def test_choose_locations_with_replacement(self):
        """SequenceEmbedder _choose_locations can sample with replacement"""
        self.ile_embedder.Length = 28   #exact fit
        self.ile_embedder.WithReplacement = True
        for i in range(10):
            first, second = self.ile_embedder._choose_locations()
            self.assertEqual(first, 0)
            self.assertEqual(second, 0)

        self.ile_embedder.Length = 29 #one left over: can be 0,0 0,1 1,1
        counts = {}
        for i in range(1000):
            key = tuple(self.ile_embedder._choose_locations())
            curr = counts.get(key, 0)
            counts[key] = curr + 1
        expected = [250, 500, 250]
        observed = [counts[(0,0)], counts[(0,1)], counts[(1,1)]]
        self.assertSimilarFreqs(observed, expected)
        #make sure nothing else snuck in there
        self.assertEqual(counts[(0,0)]+counts[(0,1)]+counts[(1,1)], 1000)
  
    def test_insert_modules(self):
        """SequenceEmbedder _insert_modules should make correct sequence"""
        ile = self.ile_embedder
        ile.Length = 50
        ile.RandomRegion.Current[:] = ['A'] * 22
        modules = list(ile.Model)
        ile.Positions = [0, 0]  #try inserting at first position
        self.assertEqual(str(ile), modules[0] + modules[1] + 'A'*22)

        ile.Positions = [3, 20]
        self.assertEqual(str(ile), 'A'*3+modules[0]+'A'*17+modules[1]+'A'*2)

    def test_refresh(self):
        """SequenceEmbedder refresh should change module sequences"""
        modules_before = list(self.ile_embedder.Model)
        random_before = str(self.ile_embedder.RandomRegion.Current)
        self.ile_embedder.refresh()
        random_after = str(self.ile_embedder.RandomRegion.Current)
        self.assertNotEqual(random_before, random_after)
        modules_after = list(self.ile_embedder.Model)
        for before, after in zip(modules_before, modules_after):
            self.assertNotEqual(before, after)
        #check that it works twice
        self.ile_embedder.refresh()
        random_third = str(self.ile_embedder.RandomRegion.Current)
        modules_third = list(self.ile_embedder.Model)
        self.assertNotEqual(random_third, random_before)
        self.assertNotEqual(random_third, random_after)
        for first, second, third in \
            zip(modules_before, modules_after, modules_third):
            self.assertNotEqual(first, third)
            self.assertNotEqual(second, third)

    def test_countMatches(self):
        """Shouldn't find any Ile matches if all the pairs are GU"""
        if not RNAFOLD_PRESENT:
            return
        self.ile_embedder.NumToDo = 100
        self.ile_embedder.Composition = BaseFrequency('GGGGGGGGGU')
        self.ile_embedder.Length = 40
        good_count = self.ile_embedder.countMatches()   
        self.assertEqual(good_count, 0)

    def test_countMatches_pass(self):
        """Should find some matches against a random background"""
        if not RNAFOLD_PRESENT:
            return
        self.ile_embedder.NumToDo = 100
        self.ile_embedder.Composition = BaseFrequency('UCAG')
        self.ile_embedder.Length = 40
        good_count = self.ile_embedder.countMatches()
        self.assertNotEqual(good_count, 0)

    def test_refresh_specific_position(self):
        """Should always find the module in the same position if specified"""
        first_module = Module('AAAAA', '(((((')
        second_module = Module('UUUUU', ')))))')
        rule_1 = Rule(0, 0, 1, 4, 5)
        helix = Motif([first_module, second_module], [rule_1])
        model = SequenceModel(constants=[ConstantRegion('AAAAA'), \
            ConstantRegion('UUUUU')], order='C0 - C1', \
            composition=BaseFrequency('A'))
        embedder = SequenceEmbedder(length=30, num_to_do=100, \
            motif=helix, model=model, composition=BaseFrequency('CG'), \
            positions=[3, 6])

        last = ''
        for i in range(100):
            embedder.refresh()
            curr = str(embedder)
            self.assertEqual(curr[3:8], 'AAAAA')
            self.assertEqual(curr[11:16], 'UUUUU')
            self.assertEqual(curr.count('A'), 5)
            self.assertEqual(curr.count('U'), 5)
            self.assertNotEqual(last, curr)
            last = curr
            
    def test_refresh_primers(self):
        """Module should appear in correct location with primers"""
        first_module = Module('AAAAA', '(((((')
        second_module = Module('UUUUU', ')))))')
        rule_1 = Rule(0, 0, 1, 4, 5)
        helix = Motif([first_module, second_module], [rule_1])
        model = SequenceModel(constants=[ConstantRegion('AAAAA'), \
            ConstantRegion('UUUUU')], order='C0 - C1', \
            composition=BaseFrequency('A'))
        embedder = SequenceEmbedder(length=30, num_to_do=100, \
            motif=helix, model=model, composition=BaseFrequency('CG'), \
            positions=[3, 6], primer_5 = 'UUU', primer_3 = 'AAA')

        last = ''
        for i in range(100):
            embedder.refresh()
            curr = str(embedder)
            self.assertEqual(curr[0:3], 'UUU')
            self.assertEqual(curr[6:11], 'AAAAA')
            self.assertEqual(curr[14:19], 'UUUUU')
            self.assertEqual(curr.count('A'), 8)
            self.assertEqual(curr.count('U'), 8)
            self.assertEqual(curr[-3:], 'AAA')
            self.assertNotEqual(last, curr)
            last = curr
 
    def xxx_test_count_long(self):
        self.ile_embedder.NumToDo = 100000
        self.ile_embedder.Composition = BaseFrequency('UCAG')
        print
        print "Extended helices"
        for length in range(30, 150):
            self.ile_embedder.Length = length
            good_count = self.ile_embedder.countMatches()   
            print "Length: %s Matches: %s/100000" % (length, good_count)
        print

    def xxx_test_count_short(self):
        self.short_ile_embedder.NumToDo = 10000
        self.short_ile_embedder.Composition = BaseFrequency('UCAG')
        print
        print "Minimal motif"
        for length in range(20, 150):
            self.short_ile_embedder.Length = length
            good_count = self.short_ile_embedder.countMatches()   
            print "Length: %s Matches: %s/10000" % (length, good_count)
        print