def test_refresh_primers(self): """Module should appear in correct location with primers""" first_module = Module('AAAAA', '(((((') second_module = Module('UUUUU', ')))))') rule_1 = Rule(0, 0, 1, 4, 5) helix = Motif([first_module, second_module], [rule_1]) model = SequenceModel(constants=[ConstantRegion('AAAAA'), \ ConstantRegion('UUUUU')], order='C0 - C1', \ composition=BaseFrequency('A')) embedder = SequenceEmbedder(length=30, num_to_do=100, \ motif=helix, model=model, composition=BaseFrequency('CG'), \ positions=[3, 6], primer_5 = 'UUU', primer_3 = 'AAA') last = '' for i in range(100): embedder.refresh() curr = str(embedder) self.assertEqual(curr[0:3], 'UUU') self.assertEqual(curr[6:11], 'AAAAA') self.assertEqual(curr[14:19], 'UUUUU') self.assertEqual(curr.count('A'), 8) self.assertEqual(curr.count('U'), 8) self.assertEqual(curr[-3:], 'AAA') self.assertNotEqual(last, curr) last = curr
def setUp(self): """Define a few standard models and motifs""" ile_mod_0 = Module('NNNCUACNNNNN', '(((((..(((((') ile_mod_1 = Module('NNNNNUAUUGGGGNNN', ')))))......)))))') ile_rule_0 = Rule(0, 0, 1, 15, 5) ile_rule_1 = Rule(0, 7, 1, 4, 5) ile_motif = Motif([ile_mod_0, ile_mod_1], \ [ile_rule_0, ile_rule_1]) helices = [PairedRegion('NNN'), PairedRegion('NNNNN')] constants = [ConstantRegion('CUAC'), ConstantRegion('UAUUGGGG')] order = "H0 C0 H1 - H1 C1 H0" ile_model = SequenceModel(order=order, constants=constants, \ helices=helices, composition=BaseFrequency('UCAG')) self.ile_embedder = SequenceEmbedder(length=50, num_to_do=10, \ motif=ile_motif, model=ile_model, composition=BaseFrequency('UCAG')) short_ile_mod_0 = Module('NCUACNN', '(((..((') short_ile_mod_1 = Module('NNUAUUGGGGN', '))......)))') short_ile_rule_0 = Rule(0, 0, 1, 10, 3) short_ile_rule_1 = Rule(0, 5, 1, 1, 2) short_ile_motif = Motif([short_ile_mod_0, short_ile_mod_1], \ [short_ile_rule_0, short_ile_rule_1]) short_helices = [PairedRegion('N'), PairedRegion('NN')] short_constants = [ConstantRegion('CUAC'), ConstantRegion('UAUUGGGG')] short_order = "H0 C0 H1 - H1 C1 H0" short_ile_model = SequenceModel(order=short_order, \ constants=short_constants, \ helices=short_helices, composition=BaseFrequency('UCAG')) self.short_ile_embedder = SequenceEmbedder(length=50, num_to_do=10, \ motif=short_ile_motif, model=short_ile_model, \ composition=BaseFrequency('UCAG'))
def test_composition_change(self): """Changes in composition should propagate.""" rr = str(self.ile_embedder.RandomRegion.Current) #for base in 'UCAG': # assert base in rr #the above two lines should generally be true but fail stochastically self.ile_embedder.Composition = BaseFrequency('CG') self.assertEqual(self.ile_embedder.Model.Composition, \ BaseFrequency('CG')) self.assertEqual(self.ile_embedder.RandomRegion.Composition, \ BaseFrequency('CG')) self.ile_embedder.RandomRegion.refresh() self.assertEqual(len(self.ile_embedder.RandomRegion), 22) rr = str(self.ile_embedder.RandomRegion.Current) assert ('C' in rr or 'G' in rr) assert 'A' not in rr assert 'U' not in rr
def test_countMatches_pass(self): """Should find some matches against a random background""" if not RNAFOLD_PRESENT: return self.ile_embedder.NumToDo = 100 self.ile_embedder.Composition = BaseFrequency('UCAG') self.ile_embedder.Length = 40 good_count = self.ile_embedder.countMatches() self.assertNotEqual(good_count, 0)
def test_countMatches(self): """Shouldn't find any Ile matches if all the pairs are GU""" if not RNAFOLD_PRESENT: return self.ile_embedder.NumToDo = 100 self.ile_embedder.Composition = BaseFrequency('GGGGGGGGGU') self.ile_embedder.Length = 40 good_count = self.ile_embedder.countMatches() self.assertEqual(good_count, 0)
def xxx_test_count_short(self): self.short_ile_embedder.NumToDo = 10000 self.short_ile_embedder.Composition = BaseFrequency('UCAG') print print "Minimal motif" for length in range(20, 150): self.short_ile_embedder.Length = length good_count = self.short_ile_embedder.countMatches() print "Length: %s Matches: %s/10000" % (length, good_count) print
def xxx_test_count_long(self): self.ile_embedder.NumToDo = 100000 self.ile_embedder.Composition = BaseFrequency('UCAG') print print "Extended helices" for length in range(30, 150): self.ile_embedder.Length = length good_count = self.ile_embedder.countMatches() print "Length: %s Matches: %s/100000" % (length, good_count) print
def test_init(self): """SequenceModel should init OK with Isoleucine motif.""" helices = [PairedRegion('NNN'), PairedRegion('NNNNN')] constants = [ConstantRegion('CUAC'), ConstantRegion('UAUUGGGG')] order = "H0 C0 H1 - H1 C1 H0" isoleucine = SequenceModel(order=order, constants=constants, \ helices=helices) isoleucine.Composition = BaseFrequency('UCAG') #print #print for i in range(10): isoleucine.refresh() #print list(isoleucine) #print isoleucine.Composition = BaseFrequency('UCAG') isoleucine.GU = False #print for i in range(10): isoleucine.refresh()
def test_refresh_specific_position(self): """Should always find the module in the same position if specified""" first_module = Module('AAAAA', '(((((') second_module = Module('UUUUU', ')))))') rule_1 = Rule(0, 0, 1, 4, 5) helix = Motif([first_module, second_module], [rule_1]) model = SequenceModel(constants=[ConstantRegion('AAAAA'), \ ConstantRegion('UUUUU')], order='C0 - C1', \ composition=BaseFrequency('A')) embedder = SequenceEmbedder(length=30, num_to_do=100, \ motif=helix, model=model, composition=BaseFrequency('CG'), \ positions=[3, 6]) last = '' for i in range(100): embedder.refresh() curr = str(embedder) self.assertEqual(curr[3:8], 'AAAAA') self.assertEqual(curr[11:16], 'UUUUU') self.assertEqual(curr.count('A'), 5) self.assertEqual(curr.count('U'), 5) self.assertNotEqual(last, curr) last = curr
def test_init(self): """BaseFrequency should init as expected""" self.assertEqual(BaseFrequency('UUUCCCCAG'), \ Freqs('UUUCCCCAG', 'UCAG')) self.assertEqual(BaseFrequency('TTTCAGG', RNA=False), \ Freqs('TTTCAGG'))