def featspacelen(self): """ Vary the feature space and the sequence length """ self.tasklist = [] featspace = self.kwdargs['featspace'] seqspace = 20 seqlen = self.kwdargs['seqlen'] dims = [(seqspace, featspace)] * seqlen # Repeat for all the tasks described for taskid in range(self.ntimes): hmm = HMM() self._set_params_generic(hmm, seqlen, dims) cmrf = CMRF(hmm) feats = self._gen_feats_generic(seqlen, featspace) task = Task('sim'+STUDY+'_'+self.name+'_'+\ str(seqlen)+'_'+str(featspace)+'_'+str(taskid),cmrf,feats) # Run Brute force to enumerate the frontier if self.kwdargs['run_brute']: with benchmark(task.name + 'brute') as t: seq, energies = self.bruteforce(cmrf, feats) task.all_seq = seq task.all_seq_energy = energies task.brute_time = t.elapsed # Now run the toy simulation` with benchmark(task.name + 'pareto') as t: task.frontier,task.frontier_energy = \ pareto_frontier(cmrf,feats) if self.plot_all: task.plot_frontier(frontier_only=True) task.pareto_time = t.elapsed self.tasklist.append(task)
def ziftied(self) : """ Set up the toy simulation """ self.tasklist = [] feats = self.kwdargs['feats'] weights = self.kwdargs['weights'] hmm = HMM() self._set_params_ziftied(hmm) #1/0 cmrf = CMRF(hmm) for taskid in range(self.ntimes) : task = Task('bio'+str(STUDY)+'_'+self.name+'_'+str(taskid),cmrf,\ feats) # Run Brute force to enumerate the frontier # with benchmark(task.name+'brute') as t: # seq,energies = self.bruteforce(cmrf,feats) # task.all_seq = seq # task.all_seq_energy = energies # task.brute_time = t.elapsed # Sample the frontier with benchmark(task.name+'sample') as t: seq,energies = self.sample(cmrf,feats) task.sample_seq = seq task.sample_seq_energy = energies task.sample_time = t.elapsed # Now run the toy simulation` with benchmark(task.name+'pareto') as t : task.frontier,task.frontier_energy = \ pareto_frontier(cmrf,feats) if self.plot_all : task.plot_frontier(frontier_only = True,plot_samples=True) task.pareto_time = t.elapsed self.tasklist.append(task)
def toy(self): """ Set up the toy simulation """ self.tasklist = [] feats = self.get_feats_standard() hmm = HMM() self._set_params_toy(hmm) cmrf = CMRF(hmm) for taskid in range(self.ntimes): task = Task('sim'+STUDY+'_'+self.name+'_'+str(taskid),cmrf,\ feats) # Run Brute force to enumerate the frontier with benchmark(task.name + 'brute') as t: seq, energies = self.bruteforce(cmrf, feats) task.all_seq = seq task.all_seq_energy = energies task.brute_time = t.elapsed # Now run the toy simulation` with benchmark(task.name + 'pareto') as t: task.frontier,task.frontier_energy = \ pareto_frontier(cmrf,feats) if self.plot_all: task.plot_frontier() task.pareto_time = t.elapsed self.tasklist.append(task)
def randfeatsuntied(self): """ Run many iterations of toy with random probs """ self.tasklist = [] feats = self.get_feats_standard() # Repeat for all the tasks described for taskid in range(self.ntimes): hmm = HMM() self._set_params_randprobsuntied(hmm) cmrf = CMRF(hmm) feats = self._gen_feats_random() task = Task('sim'+STUDY+'_'+self.name+'_'+str(taskid),cmrf,\ feats) # Run Brute force to enumerate the frontier with benchmark(task.name + 'brute') as t: seq, energies = self.bruteforce(cmrf, feats) task.all_seq = seq task.all_seq_energy = energies task.brute_time = t.elapsed # Now run the toy simulation` with benchmark(task.name + 'pareto') as t: task.frontier,task.frontier_energy = \ pareto_frontier(cmrf,feats) if self.plot_all: task.plot_frontier() task.pareto_time = t.elapsed self.tasklist.append(task)
hmm.length = 12 hmm.dims = [(2, 3)] * hmm.length # (latent,emit) dimspace hmm.emit = [[[0.6, 0.2, 0.2], [0.2, 0.6, 0.2]]] * hmm.length hmm.trans = [[[0.7, 0.3], [0.3, 0.7]]] * hmm.length hmm.seqmap = [{"a": 0, "b": 1}] * hmm.length hmm.seqmap2 = [{0: "a", 1: "b"}] * hmm.length hmm.featmap = [{"H": 0, "B": 1, "L": 2}] * hmm.length hmm.initprob = [0.5, 0.5] hmm.trained = True if __name__ == "__main__": hmm = HMM() # Set the params of the h,, set_params_hmm_exp1(hmm) cmrf = CMRF(hmm) seq1 = "a" * 12 feat1 = "HHHHLLLLHHHH" seq2 = "b" * 12 feat2 = "BBBBLLLLBBBB" # Plot the entire sequence space ll_list1, ll_list2 = [], [] for seq in product("ab", repeat=12): ll_list1.append(cmrf.score(seq, feat1)) ll_list2.append(cmrf.score(seq, feat2)) # Find the pareto frontier frontier, frontier_energy = pareto_frontier(cmrf, [feat1, feat2]) pl.figure()
def set_params_hmm_exp1(hmm): """ Sets the params of a hmm for sim experiment 1""" hmm.length = 12 hmm.dims = [(2, 3)] * hmm.length # (latent,emit) dimspace hmm.emit = [[[0.6, 0.2, 0.2], [0.2, 0.6, 0.2]]] * hmm.length hmm.trans = [[[0.7, 0.3], [0.3, 0.7]]] * hmm.length hmm.seqmap = [{'a': 0, 'b': 1}] * hmm.length hmm.seqmap2 = [{0: 'a', 1: 'b'}] * hmm.length hmm.featmap = [{'H': 0, 'B': 1, 'L': 2}] * hmm.length hmm.initprob = [0.5, 0.5] hmm.trained = True if __name__ == '__main__': hmm = HMM() # Set the params of the h,, set_params_hmm_exp1(hmm) cmrf = CMRF(hmm) seq1 = 'a' * 12 feat1 = 'HHHHLLLLHHHH' seq2 = 'b' * 12 feat2 = 'BBBBLLLLBBBB' # Find the pareto frontier frontier, frontier_energy = pareto_frontier(cmrf, [feat1, feat2]) from pprint import pprint pprint("Frontier is ") pprint(frontier) pprint("Frontier Energies are") pprint(frontier_energy)
hmm.trans.append([]) for j, aa1 in enumerate(hmm.alphabet): hmm.trans[-1].append([]) for k, aa2 in enumerate(hmm.alphabet): val = (counts2[i].get(aa1 + aa2, 0) + self.smoothfac) / ( counts[i].get(aa1, 0) + self.smoothfac * len(hmm.alphabet)) hmm.trans[-1][-1].append(val) return hmm if __name__ == '__main__': b = BoostedHMM() hmm1, hmm2 = b.hmm1, b.hmm2 # Set the params of the hmm cmrf1 = CMRF(hmm1) cmrf2 = CMRF(hmm2) feat = 'HHHHHHHHHHHH' # Plot the entire sequence space ll_list1, ll_list2 = [], [] for seq in product('ab', repeat=12): ll_list1.append(cmrf1.score(seq, feat)) ll_list2.append(cmrf2.score(seq, feat)) ll_list3, ll_list4 = [], [] for seq in b.kseqlist: ll_list3.append(cmrf1.score(seq, feat)) ll_list4.append(cmrf2.score(seq, feat)) pl.figure() pl.plot(ll_list1, ll_list2, 'b*')
hmm.length = 12 hmm.dims = [(2, 3)] * hmm.length # (latent,emit) dimspace hmm.emit = [[gen_random_dist(3), gen_random_dist(3)]] * hmm.length hmm.trans = [[gen_random_dist(2), gen_random_dist(2)]] * hmm.length hmm.seqmap = [{'a': 0, 'b': 1}] * hmm.length hmm.seqmap2 = [{0: 'a', 1: 'b'}] * hmm.length hmm.featmap = [{'H': 0, 'B': 1, 'L': 2}] * hmm.length hmm.initprob = [0.5, 0.5] hmm.trained = True if __name__ == '__main__': hmm = HMM() # Set the params of the h,, set_params_hmm_exp1(hmm) cmrf = CMRF(hmm) seq1 = 'a' * 12 feat1 = 'HHHHLLLLHHHH' seq2 = 'b' * 12 feat2 = 'BBBBLLLLBBBB' ### DEBUG import pickle cmrf = pickle.load(open('cmrf.pkl')) # Plot the entire sequence space ll_list1, ll_list2 = [], [] seq_list = ["".join(s) for s in product('ab', repeat=12)] for seq in seq_list: ll_list1.append(cmrf.score(seq, feat1)) ll_list2.append(cmrf.score(seq, feat2))
hmm.trans = [] for i in range(len(seqlist[0])-1) : hmm.trans.append([]) for j,aa1 in enumerate(hmm.alphabet) : hmm.trans[-1].append([]) for k,aa2 in enumerate(hmm.alphabet) : val = (counts2[i].get(aa1+aa2,0)+self.smoothfac) / (counts[i].get(aa1,0)+self.smoothfac*len(hmm.alphabet)) hmm.trans[-1][-1].append(val) return hmm if __name__ == '__main__' : b = BoostedHMM() hmm1,hmm2 = b.hmm1,b.hmm2 # Set the params of the hmm cmrf1 = CMRF(hmm1) cmrf2 = CMRF(hmm2) feat = 'HHHHHHHHHHHH' # Plot the entire sequence space ll_list1,ll_list2 = [],[] for seq in product('ab',repeat=12): ll_list1.append(cmrf1.score(seq,feat)) ll_list2.append(cmrf2.score(seq,feat)) ll_list3,ll_list4 = [],[] for seq in b.kseqlist: ll_list3.append(cmrf1.score(seq,feat)) ll_list4.append(cmrf2.score(seq,feat)) pl.figure()