Beispiel #1
0
    def featspacelen(self):
        """ Vary the feature space and the sequence length """
        self.tasklist = []
        featspace = self.kwdargs['featspace']
        seqspace = 20
        seqlen = self.kwdargs['seqlen']
        dims = [(seqspace, featspace)] * seqlen

        # Repeat for all the tasks described
        for taskid in range(self.ntimes):
            hmm = HMM()
            self._set_params_generic(hmm, seqlen, dims)
            cmrf = CMRF(hmm)
            feats = self._gen_feats_generic(seqlen, featspace)
            task = Task('sim'+STUDY+'_'+self.name+'_'+\
             str(seqlen)+'_'+str(featspace)+'_'+str(taskid),cmrf,feats)
            # Run Brute force to enumerate the frontier
            if self.kwdargs['run_brute']:
                with benchmark(task.name + 'brute') as t:
                    seq, energies = self.bruteforce(cmrf, feats)
                task.all_seq = seq
                task.all_seq_energy = energies
                task.brute_time = t.elapsed

            # Now run the toy simulation`
            with benchmark(task.name + 'pareto') as t:
                task.frontier,task.frontier_energy = \
                 pareto_frontier(cmrf,feats)
            if self.plot_all:
                task.plot_frontier(frontier_only=True)
            task.pareto_time = t.elapsed
            self.tasklist.append(task)
Beispiel #2
0
	def ziftied(self) :
		""" Set up the toy simulation """	
		self.tasklist = []
		feats = self.kwdargs['feats']
		weights = self.kwdargs['weights']
		hmm = HMM()
		self._set_params_ziftied(hmm)
		#1/0
		cmrf = CMRF(hmm)
		for taskid in range(self.ntimes) :	
			task = Task('bio'+str(STUDY)+'_'+self.name+'_'+str(taskid),cmrf,\
				feats)				
			# Run Brute force to enumerate the frontier
#			with benchmark(task.name+'brute') as t:
#				seq,energies = self.bruteforce(cmrf,feats)			
#			task.all_seq = seq
#			task.all_seq_energy = energies
#			task.brute_time = t.elapsed			

			# Sample the frontier
			with benchmark(task.name+'sample') as t:
				seq,energies = self.sample(cmrf,feats)			
			task.sample_seq = seq
			task.sample_seq_energy = energies
			task.sample_time = t.elapsed			

			# Now run the toy simulation`
			with benchmark(task.name+'pareto') as t : 
				task.frontier,task.frontier_energy = \
					pareto_frontier(cmrf,feats)		
			if self.plot_all :
				task.plot_frontier(frontier_only = True,plot_samples=True)
			task.pareto_time = t.elapsed
			self.tasklist.append(task)	
Beispiel #3
0
    def toy(self):
        """ Set up the toy simulation """
        self.tasklist = []
        feats = self.get_feats_standard()
        hmm = HMM()
        self._set_params_toy(hmm)
        cmrf = CMRF(hmm)
        for taskid in range(self.ntimes):
            task = Task('sim'+STUDY+'_'+self.name+'_'+str(taskid),cmrf,\
             feats)
            # Run Brute force to enumerate the frontier
            with benchmark(task.name + 'brute') as t:
                seq, energies = self.bruteforce(cmrf, feats)
            task.all_seq = seq
            task.all_seq_energy = energies
            task.brute_time = t.elapsed

            # Now run the toy simulation`
            with benchmark(task.name + 'pareto') as t:
                task.frontier,task.frontier_energy = \
                 pareto_frontier(cmrf,feats)
            if self.plot_all:
                task.plot_frontier()
            task.pareto_time = t.elapsed
            self.tasklist.append(task)
Beispiel #4
0
    def randfeatsuntied(self):
        """ Run many iterations of toy with random probs  """
        self.tasklist = []
        feats = self.get_feats_standard()

        # Repeat for all the tasks described
        for taskid in range(self.ntimes):
            hmm = HMM()
            self._set_params_randprobsuntied(hmm)
            cmrf = CMRF(hmm)
            feats = self._gen_feats_random()
            task = Task('sim'+STUDY+'_'+self.name+'_'+str(taskid),cmrf,\
             feats)
            # Run Brute force to enumerate the frontier
            with benchmark(task.name + 'brute') as t:
                seq, energies = self.bruteforce(cmrf, feats)
            task.all_seq = seq
            task.all_seq_energy = energies
            task.brute_time = t.elapsed

            # Now run the toy simulation`
            with benchmark(task.name + 'pareto') as t:
                task.frontier,task.frontier_energy = \
                 pareto_frontier(cmrf,feats)
            if self.plot_all:
                task.plot_frontier()
            task.pareto_time = t.elapsed
            self.tasklist.append(task)
Beispiel #5
0
    hmm.length = 12
    hmm.dims = [(2, 3)] * hmm.length  # (latent,emit) dimspace
    hmm.emit = [[[0.6, 0.2, 0.2], [0.2, 0.6, 0.2]]] * hmm.length
    hmm.trans = [[[0.7, 0.3], [0.3, 0.7]]] * hmm.length
    hmm.seqmap = [{"a": 0, "b": 1}] * hmm.length
    hmm.seqmap2 = [{0: "a", 1: "b"}] * hmm.length
    hmm.featmap = [{"H": 0, "B": 1, "L": 2}] * hmm.length
    hmm.initprob = [0.5, 0.5]
    hmm.trained = True


if __name__ == "__main__":
    hmm = HMM()
    # Set the params of the h,,
    set_params_hmm_exp1(hmm)
    cmrf = CMRF(hmm)
    seq1 = "a" * 12
    feat1 = "HHHHLLLLHHHH"
    seq2 = "b" * 12
    feat2 = "BBBBLLLLBBBB"

    # Plot the entire sequence space
    ll_list1, ll_list2 = [], []
    for seq in product("ab", repeat=12):
        ll_list1.append(cmrf.score(seq, feat1))
        ll_list2.append(cmrf.score(seq, feat2))

        # Find the pareto frontier
    frontier, frontier_energy = pareto_frontier(cmrf, [feat1, feat2])

    pl.figure()
Beispiel #6
0
def set_params_hmm_exp1(hmm):
    """ Sets the params of a hmm for sim experiment 1"""
    hmm.length = 12
    hmm.dims = [(2, 3)] * hmm.length  # (latent,emit) dimspace
    hmm.emit = [[[0.6, 0.2, 0.2], [0.2, 0.6, 0.2]]] * hmm.length
    hmm.trans = [[[0.7, 0.3], [0.3, 0.7]]] * hmm.length
    hmm.seqmap = [{'a': 0, 'b': 1}] * hmm.length
    hmm.seqmap2 = [{0: 'a', 1: 'b'}] * hmm.length
    hmm.featmap = [{'H': 0, 'B': 1, 'L': 2}] * hmm.length
    hmm.initprob = [0.5, 0.5]
    hmm.trained = True


if __name__ == '__main__':
    hmm = HMM()
    # Set the params of the h,,
    set_params_hmm_exp1(hmm)
    cmrf = CMRF(hmm)
    seq1 = 'a' * 12
    feat1 = 'HHHHLLLLHHHH'
    seq2 = 'b' * 12
    feat2 = 'BBBBLLLLBBBB'

    # Find the pareto frontier
    frontier, frontier_energy = pareto_frontier(cmrf, [feat1, feat2])
    from pprint import pprint
    pprint("Frontier is ")
    pprint(frontier)
    pprint("Frontier Energies are")
    pprint(frontier_energy)
Beispiel #7
0
            hmm.trans.append([])
            for j, aa1 in enumerate(hmm.alphabet):
                hmm.trans[-1].append([])
                for k, aa2 in enumerate(hmm.alphabet):
                    val = (counts2[i].get(aa1 + aa2, 0) + self.smoothfac) / (
                        counts[i].get(aa1, 0) +
                        self.smoothfac * len(hmm.alphabet))
                    hmm.trans[-1][-1].append(val)
        return hmm


if __name__ == '__main__':
    b = BoostedHMM()
    hmm1, hmm2 = b.hmm1, b.hmm2
    # Set the params of the hmm
    cmrf1 = CMRF(hmm1)
    cmrf2 = CMRF(hmm2)
    feat = 'HHHHHHHHHHHH'

    # Plot the entire sequence space
    ll_list1, ll_list2 = [], []
    for seq in product('ab', repeat=12):
        ll_list1.append(cmrf1.score(seq, feat))
        ll_list2.append(cmrf2.score(seq, feat))
    ll_list3, ll_list4 = [], []
    for seq in b.kseqlist:
        ll_list3.append(cmrf1.score(seq, feat))
        ll_list4.append(cmrf2.score(seq, feat))

    pl.figure()
    pl.plot(ll_list1, ll_list2, 'b*')
Beispiel #8
0
    hmm.length = 12
    hmm.dims = [(2, 3)] * hmm.length  # (latent,emit) dimspace
    hmm.emit = [[gen_random_dist(3), gen_random_dist(3)]] * hmm.length
    hmm.trans = [[gen_random_dist(2), gen_random_dist(2)]] * hmm.length
    hmm.seqmap = [{'a': 0, 'b': 1}] * hmm.length
    hmm.seqmap2 = [{0: 'a', 1: 'b'}] * hmm.length
    hmm.featmap = [{'H': 0, 'B': 1, 'L': 2}] * hmm.length
    hmm.initprob = [0.5, 0.5]
    hmm.trained = True


if __name__ == '__main__':
    hmm = HMM()
    # Set the params of the h,,
    set_params_hmm_exp1(hmm)
    cmrf = CMRF(hmm)
    seq1 = 'a' * 12
    feat1 = 'HHHHLLLLHHHH'
    seq2 = 'b' * 12
    feat2 = 'BBBBLLLLBBBB'

    ### DEBUG
    import pickle
    cmrf = pickle.load(open('cmrf.pkl'))

    # Plot the entire sequence space
    ll_list1, ll_list2 = [], []
    seq_list = ["".join(s) for s in product('ab', repeat=12)]
    for seq in seq_list:
        ll_list1.append(cmrf.score(seq, feat1))
        ll_list2.append(cmrf.score(seq, feat2))
Beispiel #9
0
		hmm.trans = []

		for i in range(len(seqlist[0])-1) :
			hmm.trans.append([])
			for j,aa1 in enumerate(hmm.alphabet) : 
				hmm.trans[-1].append([])
				for k,aa2 in enumerate(hmm.alphabet) :
					val = (counts2[i].get(aa1+aa2,0)+self.smoothfac) / (counts[i].get(aa1,0)+self.smoothfac*len(hmm.alphabet))
					hmm.trans[-1][-1].append(val)
		return hmm

if __name__ == '__main__' : 
	b = BoostedHMM()
	hmm1,hmm2 = b.hmm1,b.hmm2
	# Set the params of the hmm
	cmrf1 = CMRF(hmm1)
	cmrf2 = CMRF(hmm2)
	feat = 'HHHHHHHHHHHH'

	# Plot the entire sequence space
	ll_list1,ll_list2 = [],[]
	for seq in product('ab',repeat=12):	
		ll_list1.append(cmrf1.score(seq,feat))
		ll_list2.append(cmrf2.score(seq,feat))
	ll_list3,ll_list4 = [],[]
	for seq in b.kseqlist:	
		ll_list3.append(cmrf1.score(seq,feat))
		ll_list4.append(cmrf2.score(seq,feat))


	pl.figure()