def test_refresh_primers(self): """Module should appear in correct location with primers""" first_module = Module('AAAAA', '(((((') second_module = Module('UUUUU', ')))))') rule_1 = Rule(0, 0, 1, 4, 5) helix = Motif([first_module, second_module], [rule_1]) model = SequenceModel(constants=[ConstantRegion('AAAAA'), \ ConstantRegion('UUUUU')], order='C0 - C1', \ composition=BaseFrequency('A')) embedder = SequenceEmbedder(length=30, num_to_do=100, \ motif=helix, model=model, composition=BaseFrequency('CG'), \ positions=[3, 6], primer_5 = 'UUU', primer_3 = 'AAA') last = '' for i in range(100): embedder.refresh() curr = str(embedder) self.assertEqual(curr[0:3], 'UUU') self.assertEqual(curr[6:11], 'AAAAA') self.assertEqual(curr[14:19], 'UUUUU') self.assertEqual(curr.count('A'), 8) self.assertEqual(curr.count('U'), 8) self.assertEqual(curr[-3:], 'AAA') self.assertNotEqual(last, curr) last = curr
def setUp(self): """Define a few standard models and motifs""" ile_mod_0 = Module('NNNCUACNNNNN', '(((((..(((((') ile_mod_1 = Module('NNNNNUAUUGGGGNNN', ')))))......)))))') ile_rule_0 = Rule(0, 0, 1, 15, 5) ile_rule_1 = Rule(0, 7, 1, 4, 5) ile_motif = Motif([ile_mod_0, ile_mod_1], \ [ile_rule_0, ile_rule_1]) helices = [PairedRegion('NNN'), PairedRegion('NNNNN')] constants = [ConstantRegion('CUAC'), ConstantRegion('UAUUGGGG')] order = "H0 C0 H1 - H1 C1 H0" ile_model = SequenceModel(order=order, constants=constants, \ helices=helices, composition=BaseFrequency('UCAG')) self.ile_embedder = SequenceEmbedder(length=50, num_to_do=10, \ motif=ile_motif, model=ile_model, composition=BaseFrequency('UCAG')) short_ile_mod_0 = Module('NCUACNN', '(((..((') short_ile_mod_1 = Module('NNUAUUGGGGN', '))......)))') short_ile_rule_0 = Rule(0, 0, 1, 10, 3) short_ile_rule_1 = Rule(0, 5, 1, 1, 2) short_ile_motif = Motif([short_ile_mod_0, short_ile_mod_1], \ [short_ile_rule_0, short_ile_rule_1]) short_helices = [PairedRegion('N'), PairedRegion('NN')] short_constants = [ConstantRegion('CUAC'), ConstantRegion('UAUUGGGG')] short_order = "H0 C0 H1 - H1 C1 H0" short_ile_model = SequenceModel(order=short_order, \ constants=short_constants, \ helices=short_helices, composition=BaseFrequency('UCAG')) self.short_ile_embedder = SequenceEmbedder(length=50, num_to_do=10, \ motif=short_ile_motif, model=short_ile_model, \ composition=BaseFrequency('UCAG'))
def setUp(self): """Define a few standard models and motifs""" ile_mod_0 = Module("NNNCUACNNNNN", "(((((..(((((") ile_mod_1 = Module("NNNNNUAUUGGGGNNN", ")))))......)))))") ile_rule_0 = Rule(0, 0, 1, 15, 5) ile_rule_1 = Rule(0, 7, 1, 4, 5) ile_motif = Motif([ile_mod_0, ile_mod_1], [ile_rule_0, ile_rule_1]) helices = [PairedRegion("NNN"), PairedRegion("NNNNN")] constants = [ConstantRegion("CUAC"), ConstantRegion("UAUUGGGG")] order = "H0 C0 H1 - H1 C1 H0" ile_model = SequenceModel(order=order, constants=constants, helices=helices, composition=BaseFrequency("UCAG")) self.ile_embedder = SequenceEmbedder( length=50, num_to_do=10, motif=ile_motif, model=ile_model, composition=BaseFrequency("UCAG") ) short_ile_mod_0 = Module("NCUACNN", "(((..((") short_ile_mod_1 = Module("NNUAUUGGGGN", "))......)))") short_ile_rule_0 = Rule(0, 0, 1, 10, 3) short_ile_rule_1 = Rule(0, 5, 1, 1, 2) short_ile_motif = Motif([short_ile_mod_0, short_ile_mod_1], [short_ile_rule_0, short_ile_rule_1]) short_helices = [PairedRegion("N"), PairedRegion("NN")] short_constants = [ConstantRegion("CUAC"), ConstantRegion("UAUUGGGG")] short_order = "H0 C0 H1 - H1 C1 H0" short_ile_model = SequenceModel( order=short_order, constants=short_constants, helices=short_helices, composition=BaseFrequency("UCAG") ) self.short_ile_embedder = SequenceEmbedder( length=50, num_to_do=10, motif=short_ile_motif, model=short_ile_model, composition=BaseFrequency("UCAG") )
def test_refresh_primers(self): """Module should appear in correct location with primers""" first_module = Module("AAAAA", "(((((") second_module = Module("UUUUU", ")))))") rule_1 = Rule(0, 0, 1, 4, 5) helix = Motif([first_module, second_module], [rule_1]) model = SequenceModel( constants=[ConstantRegion("AAAAA"), ConstantRegion("UUUUU")], order="C0 - C1", composition=BaseFrequency("A"), ) embedder = SequenceEmbedder( length=30, num_to_do=100, motif=helix, model=model, composition=BaseFrequency("CG"), positions=[3, 6], primer_5="UUU", primer_3="AAA", ) last = "" for i in range(100): embedder.refresh() curr = str(embedder) self.assertEqual(curr[0:3], "UUU") self.assertEqual(curr[6:11], "AAAAA") self.assertEqual(curr[14:19], "UUUUU") self.assertEqual(curr.count("A"), 8) self.assertEqual(curr.count("U"), 8) self.assertEqual(curr[-3:], "AAA") self.assertNotEqual(last, curr) last = curr
def test_refresh_specific_position(self): """Should always find the module in the same position if specified""" first_module = Module("AAAAA", "(((((") second_module = Module("UUUUU", ")))))") rule_1 = Rule(0, 0, 1, 4, 5) helix = Motif([first_module, second_module], [rule_1]) model = SequenceModel( constants=[ConstantRegion("AAAAA"), ConstantRegion("UUUUU")], order="C0 - C1", composition=BaseFrequency("A"), ) embedder = SequenceEmbedder( length=30, num_to_do=100, motif=helix, model=model, composition=BaseFrequency("CG"), positions=[3, 6] ) last = "" for i in range(100): embedder.refresh() curr = str(embedder) self.assertEqual(curr[3:8], "AAAAA") self.assertEqual(curr[11:16], "UUUUU") self.assertEqual(curr.count("A"), 5) self.assertEqual(curr.count("U"), 5) self.assertNotEqual(last, curr) last = curr
def test_refresh_specific_position(self): """Should always find the module in the same position if specified""" first_module = Module('AAAAA', '(((((') second_module = Module('UUUUU', ')))))') rule_1 = Rule(0, 0, 1, 4, 5) helix = Motif([first_module, second_module], [rule_1]) model = SequenceModel(constants=[ConstantRegion('AAAAA'), \ ConstantRegion('UUUUU')], order='C0 - C1', \ composition=BaseFrequency('A')) embedder = SequenceEmbedder(length=30, num_to_do=100, \ motif=helix, model=model, composition=BaseFrequency('CG'), \ positions=[3, 6]) last = '' for i in range(100): embedder.refresh() curr = str(embedder) self.assertEqual(curr[3:8], 'AAAAA') self.assertEqual(curr[11:16], 'UUUUU') self.assertEqual(curr.count('A'), 5) self.assertEqual(curr.count('U'), 5) self.assertNotEqual(last, curr) last = curr
class SequenceEmbedderTests(TestCase): """Tests of the SequenceEmbedder class.""" def setUp(self): """Define a few standard models and motifs""" ile_mod_0 = Module('NNNCUACNNNNN', '(((((..(((((') ile_mod_1 = Module('NNNNNUAUUGGGGNNN', ')))))......)))))') ile_rule_0 = Rule(0, 0, 1, 15, 5) ile_rule_1 = Rule(0, 7, 1, 4, 5) ile_motif = Motif([ile_mod_0, ile_mod_1], \ [ile_rule_0, ile_rule_1]) helices = [PairedRegion('NNN'), PairedRegion('NNNNN')] constants = [ConstantRegion('CUAC'), ConstantRegion('UAUUGGGG')] order = "H0 C0 H1 - H1 C1 H0" ile_model = SequenceModel(order=order, constants=constants, \ helices=helices, composition=BaseFrequency('UCAG')) self.ile_embedder = SequenceEmbedder(length=50, num_to_do=10, \ motif=ile_motif, model=ile_model, composition=BaseFrequency('UCAG')) short_ile_mod_0 = Module('NCUACNN', '(((..((') short_ile_mod_1 = Module('NNUAUUGGGGN', '))......)))') short_ile_rule_0 = Rule(0, 0, 1, 10, 3) short_ile_rule_1 = Rule(0, 5, 1, 1, 2) short_ile_motif = Motif([short_ile_mod_0, short_ile_mod_1], \ [short_ile_rule_0, short_ile_rule_1]) short_helices = [PairedRegion('N'), PairedRegion('NN')] short_constants = [ConstantRegion('CUAC'), ConstantRegion('UAUUGGGG')] short_order = "H0 C0 H1 - H1 C1 H0" short_ile_model = SequenceModel(order=short_order, \ constants=short_constants, \ helices=short_helices, composition=BaseFrequency('UCAG')) self.short_ile_embedder = SequenceEmbedder(length=50, num_to_do=10, \ motif=short_ile_motif, model=short_ile_model, \ composition=BaseFrequency('UCAG')) def test_composition_change(self): """Changes in composition should propagate.""" rr = str(self.ile_embedder.RandomRegion.Current) #for base in 'UCAG': # assert base in rr #the above two lines should generally be true but fail stochastically self.ile_embedder.Composition = BaseFrequency('CG') self.assertEqual(self.ile_embedder.Model.Composition, \ BaseFrequency('CG')) self.assertEqual(self.ile_embedder.RandomRegion.Composition, \ BaseFrequency('CG')) self.ile_embedder.RandomRegion.refresh() self.assertEqual(len(self.ile_embedder.RandomRegion), 22) rr = str(self.ile_embedder.RandomRegion.Current) assert ('C' in rr or 'G' in rr) assert 'A' not in rr assert 'U' not in rr def test_choose_locations_too_short(self): """SequenceEmbedder _choose_locations should fail if too little space""" self.ile_embedder.Length = 28 #no positions left over self.assertRaises(ValueError, self.ile_embedder._choose_locations) self.ile_embedder.Length = 29 #one position left over self.assertRaises(ValueError, self.ile_embedder._choose_locations) def test_choose_locations_exact(self): """SequenceEmbedder _choose_locations should pick all locations""" self.ile_embedder.Length = 30 #two positions left: must both be filled for i in range(10): first, second = self.ile_embedder._choose_locations() self.assertEqual(first, 0) self.assertEqual(second, 1) def test_choose_locations_even(self): """SequenceEmbedder _choose_locations should pick locations evenly""" self.ile_embedder.Length = 31 #three positions left counts = {} for i in range(1000): key = tuple(self.ile_embedder._choose_locations()) assert key[0] != key[1] curr = counts.get(key, 0) counts[key] = curr + 1 expected = [333, 333, 333] observed = [counts[(0,1)], counts[(0,2)], counts[(1,2)]] self.assertSimilarFreqs(observed, expected) #make sure nothing else snuck in there self.assertEqual(counts[(0,1)]+counts[(0,2)]+counts[(1,2)], 1000) def test_choose_locations_with_replacement(self): """SequenceEmbedder _choose_locations can sample with replacement""" self.ile_embedder.Length = 28 #exact fit self.ile_embedder.WithReplacement = True for i in range(10): first, second = self.ile_embedder._choose_locations() self.assertEqual(first, 0) self.assertEqual(second, 0) self.ile_embedder.Length = 29 #one left over: can be 0,0 0,1 1,1 counts = {} for i in range(1000): key = tuple(self.ile_embedder._choose_locations()) curr = counts.get(key, 0) counts[key] = curr + 1 expected = [250, 500, 250] observed = [counts[(0,0)], counts[(0,1)], counts[(1,1)]] self.assertSimilarFreqs(observed, expected) #make sure nothing else snuck in there self.assertEqual(counts[(0,0)]+counts[(0,1)]+counts[(1,1)], 1000) def test_insert_modules(self): """SequenceEmbedder _insert_modules should make correct sequence""" ile = self.ile_embedder ile.Length = 50 ile.RandomRegion.Current[:] = ['A'] * 22 modules = list(ile.Model) ile.Positions = [0, 0] #try inserting at first position self.assertEqual(str(ile), modules[0] + modules[1] + 'A'*22) ile.Positions = [3, 20] self.assertEqual(str(ile), 'A'*3+modules[0]+'A'*17+modules[1]+'A'*2) def test_refresh(self): """SequenceEmbedder refresh should change module sequences""" modules_before = list(self.ile_embedder.Model) random_before = str(self.ile_embedder.RandomRegion.Current) self.ile_embedder.refresh() random_after = str(self.ile_embedder.RandomRegion.Current) self.assertNotEqual(random_before, random_after) modules_after = list(self.ile_embedder.Model) for before, after in zip(modules_before, modules_after): self.assertNotEqual(before, after) #check that it works twice self.ile_embedder.refresh() random_third = str(self.ile_embedder.RandomRegion.Current) modules_third = list(self.ile_embedder.Model) self.assertNotEqual(random_third, random_before) self.assertNotEqual(random_third, random_after) for first, second, third in \ zip(modules_before, modules_after, modules_third): self.assertNotEqual(first, third) self.assertNotEqual(second, third) def test_countMatches(self): """Shouldn't find any Ile matches if all the pairs are GU""" if not RNAFOLD_PRESENT: return self.ile_embedder.NumToDo = 100 self.ile_embedder.Composition = BaseFrequency('GGGGGGGGGU') self.ile_embedder.Length = 40 good_count = self.ile_embedder.countMatches() self.assertEqual(good_count, 0) def test_countMatches_pass(self): """Should find some matches against a random background""" if not RNAFOLD_PRESENT: return self.ile_embedder.NumToDo = 100 self.ile_embedder.Composition = BaseFrequency('UCAG') self.ile_embedder.Length = 40 good_count = self.ile_embedder.countMatches() self.assertNotEqual(good_count, 0) def test_refresh_specific_position(self): """Should always find the module in the same position if specified""" first_module = Module('AAAAA', '(((((') second_module = Module('UUUUU', ')))))') rule_1 = Rule(0, 0, 1, 4, 5) helix = Motif([first_module, second_module], [rule_1]) model = SequenceModel(constants=[ConstantRegion('AAAAA'), \ ConstantRegion('UUUUU')], order='C0 - C1', \ composition=BaseFrequency('A')) embedder = SequenceEmbedder(length=30, num_to_do=100, \ motif=helix, model=model, composition=BaseFrequency('CG'), \ positions=[3, 6]) last = '' for i in range(100): embedder.refresh() curr = str(embedder) self.assertEqual(curr[3:8], 'AAAAA') self.assertEqual(curr[11:16], 'UUUUU') self.assertEqual(curr.count('A'), 5) self.assertEqual(curr.count('U'), 5) self.assertNotEqual(last, curr) last = curr def test_refresh_primers(self): """Module should appear in correct location with primers""" first_module = Module('AAAAA', '(((((') second_module = Module('UUUUU', ')))))') rule_1 = Rule(0, 0, 1, 4, 5) helix = Motif([first_module, second_module], [rule_1]) model = SequenceModel(constants=[ConstantRegion('AAAAA'), \ ConstantRegion('UUUUU')], order='C0 - C1', \ composition=BaseFrequency('A')) embedder = SequenceEmbedder(length=30, num_to_do=100, \ motif=helix, model=model, composition=BaseFrequency('CG'), \ positions=[3, 6], primer_5 = 'UUU', primer_3 = 'AAA') last = '' for i in range(100): embedder.refresh() curr = str(embedder) self.assertEqual(curr[0:3], 'UUU') self.assertEqual(curr[6:11], 'AAAAA') self.assertEqual(curr[14:19], 'UUUUU') self.assertEqual(curr.count('A'), 8) self.assertEqual(curr.count('U'), 8) self.assertEqual(curr[-3:], 'AAA') self.assertNotEqual(last, curr) last = curr def xxx_test_count_long(self): self.ile_embedder.NumToDo = 100000 self.ile_embedder.Composition = BaseFrequency('UCAG') print print "Extended helices" for length in range(30, 150): self.ile_embedder.Length = length good_count = self.ile_embedder.countMatches() print "Length: %s Matches: %s/100000" % (length, good_count) print def xxx_test_count_short(self): self.short_ile_embedder.NumToDo = 10000 self.short_ile_embedder.Composition = BaseFrequency('UCAG') print print "Minimal motif" for length in range(20, 150): self.short_ile_embedder.Length = length good_count = self.short_ile_embedder.countMatches() print "Length: %s Matches: %s/10000" % (length, good_count) print