コード例 #1
0
ファイル: Model.py プロジェクト: KyriacosShiarli/MDP
	def buildTransitionFunction(self,bin_samples,learn =True):
		def transition_smart():
			self.transition = {}
			tot_states=self.transition["tot_states"] = transition_f.shape[1]
			tot_actions=self.transition["tot_actions"] =transition_f.shape[0]
			self.transition["backward"] = [{} for j in xrange(tot_states*tot_actions)]
			self.transition["forward"] = [{} for j in xrange(tot_states*tot_actions)]
			idx = np.transpose(np.array(np.nonzero(transition_f)))
			for i in idx:
				self.transition["backward"][i[0] + i[1]*tot_actions][str(i[2])] = transition_f[i[0],i[1],i[2]]
				self.transition["forward"][i[0] + i[2]*tot_actions][str(i[1])] = transition_f[i[0],i[1],i[2]]

		transition_f = np.zeros([self.disc.tot_actions,self.disc.tot_states,self.disc.tot_states])
		for i in xrange(self.disc.tot_states):
			for j in xrange(self.disc.tot_actions):
				bins = self.disc.stateToBins(i)
				for k in xrange(bin_samples):
					assert bin_samples != 0
					if bin_samples==1:
						samp = False
					else:
						samp = True
					quantity = self.disc.binsToQuantity(bins,sample = samp)

					action = self.disc.indexToAction(j)
					next_quantity = staticGroupSimple(quantity,action)
					next_state = self.disc.quantityToState(next_quantity)
					transition_f[j,i,next_state] += 1
		if learn ==True:
			trans = learn_tran()
			transition_f =np.add( transition_f , trans)
		row_sums = np.sum(transition_f,axis=2)
		transition_f = transition_f/row_sums[:,:,np.newaxis]
		self.transition_f = transition_f
		transition_smart()
コード例 #2
0
    def buildTransitionFunction(self, bin_samples, learn=True):
        def transition_smart():
            self.transition = Transition()
            tot_states = self.transition.tot_states = transition_f.shape[1]
            tot_actions = self.transition.tot_actions = transition_f.shape[0]
            self.transition.backward = [{} for j in xrange(tot_states *
                                                           tot_actions)]
            self.transition.forward = [{} for j in xrange(tot_states *
                                                          tot_actions)]
            idx = np.transpose(np.array(np.nonzero(transition_f)))
            for i in idx:
                self.transition.backward[i[0] + i[1] * tot_actions][str(
                    i[2])] = transition_f[i[0], i[1], i[2]]
                self.transition.forward[i[0] + i[2] * tot_actions][str(
                    i[1])] = transition_f[i[0], i[1], i[2]]
            print self.transition.backward

        #print "Got Here estemated"
        transition_f = np.zeros([
            self.disc.tot_actions, self.disc.tot_states, self.disc.tot_states
        ])
        transtion_test = np.zeros([
            self.disc.tot_actions, self.disc.tot_states, self.disc.tot_states
        ])
        estimators = learn_correction(10, 10)
        for i in xrange(self.disc.tot_states):
            for j in xrange(self.disc.tot_actions):
                bins = self.disc.stateToBins(i)
                for k in xrange(bin_samples):
                    assert bin_samples != 0
                    if bin_samples == 1:
                        samp = False
                    else:
                        samp = True
                    quantity = self.disc.binsToQuantity(bins, sample=samp)
                    action = self.disc.indexToAction(j)
                    correction = predict_next(quantity, action, estimators)
                    correction[0] = np.arcsin(correction[0]) * 2
                    next_quantity = staticGroupSimple2(quantity,
                                                       action) - correction
                    #next_quantity2 = staticGroupSimple2(quantity,action)
                    #if np.sum(next_quantity[:1]-next_quantity2[:1])>1:
                    #	print "Quantity",quantity
                    #	print "ACTION",action
                    #	print next_quantity[:2],next_quantity2[:2]
                    next_state = self.disc.quantityToState(next_quantity)
                    #next2 = self.disc.quantityToState(next_quantity2)
                    #print "state",ic
                    #print "actions",j
                    transition_f[j, i, next_state] += 1
        if learn == True:
            trans = learn_tran()
            transition_f = np.add(transition_f, 2 * trans)
        row_sums = np.sum(transition_f, axis=2)
        transition_f = transition_f / row_sums[:, :, np.newaxis]
        self.transition_f = transition_f
        transition_smart()
コード例 #3
0
ファイル: Model.py プロジェクト: KyriacosShiarli/MDP
	def buildTransitionFunction(self,bin_samples,learn =True):
		def transition_smart():
			self.transition = Transition()
			tot_states=self.transition.tot_states = transition_f.shape[1]
			tot_actions=self.transition.tot_actions =transition_f.shape[0]
			self.transition.backward = [{} for j in xrange(tot_states*tot_actions)]
			self.transition.forward = [{} for j in xrange(tot_states*tot_actions)]
			idx = np.transpose(np.array(np.nonzero(transition_f)))
			for i in idx:
				self.transition.backward[i[0] + i[1]*tot_actions][str(i[2])] = transition_f[i[0],i[1],i[2]]
				self.transition.forward[i[0] + i[2]*tot_actions][str(i[1])] = transition_f[i[0],i[1],i[2]]
			print self.transition.backward
		#print "Got Here estemated"	
		transition_f = np.zeros([self.disc.tot_actions,self.disc.tot_states,self.disc.tot_states])
		transtion_test = np.zeros([self.disc.tot_actions,self.disc.tot_states,self.disc.tot_states])
		estimators = learn_correction(10,10)
		for i in xrange(self.disc.tot_states):
			for j in xrange(self.disc.tot_actions):
				bins = self.disc.stateToBins(i)	
				for k in xrange(bin_samples):
					assert bin_samples != 0
					if bin_samples==1:
						samp = False
					else:
						samp = True
					quantity = self.disc.binsToQuantity(bins,sample = samp)
					action = self.disc.indexToAction(j)
					correction = predict_next(quantity,action,estimators)
					correction[0] = np.arcsin(correction[0])*2
					next_quantity = staticGroupSimple2(quantity,action) - correction
					#next_quantity2 = staticGroupSimple2(quantity,action)
					#if np.sum(next_quantity[:1]-next_quantity2[:1])>1:
					#	print "Quantity",quantity
					#	print "ACTION",action
					#	print next_quantity[:2],next_quantity2[:2]
					next_state = self.disc.quantityToState(next_quantity)
					#next2 = self.disc.quantityToState(next_quantity2)
					#print "state",ic
					#print "actions",j
					transition_f[j,i,next_state] += 1
		if learn ==True:
			trans = learn_tran()
			transition_f =np.add( transition_f ,2* trans)
		row_sums = np.sum(transition_f,axis=2)
		transition_f = transition_f/row_sums[:,:,np.newaxis]
		self.transition_f = transition_f
		transition_smart()
コード例 #4
0
    def buildTransitionFunction(self, bin_samples, learn=True):
        def transition_smart():
            self.transition = {}
            tot_states = self.transition["tot_states"] = transition_f.shape[1]
            tot_actions = self.transition["tot_actions"] = transition_f.shape[
                0]
            self.transition["backward"] = [{} for j in xrange(tot_states *
                                                              tot_actions)]
            self.transition["forward"] = [{} for j in xrange(tot_states *
                                                             tot_actions)]
            idx = np.transpose(np.array(np.nonzero(transition_f)))
            for i in idx:
                self.transition["backward"][i[0] + i[1] * tot_actions][str(
                    i[2])] = transition_f[i[0], i[1], i[2]]
                self.transition["forward"][i[0] + i[2] * tot_actions][str(
                    i[1])] = transition_f[i[0], i[1], i[2]]

        transition_f = np.zeros([
            self.disc.tot_actions, self.disc.tot_states, self.disc.tot_states
        ])
        for i in xrange(self.disc.tot_states):
            for j in xrange(self.disc.tot_actions):
                bins = self.disc.stateToBins(i)
                for k in xrange(bin_samples):
                    assert bin_samples != 0
                    if bin_samples == 1:
                        samp = False
                    else:
                        samp = True
                    quantity = self.disc.binsToQuantity(bins, sample=samp)

                    action = self.disc.indexToAction(j)
                    next_quantity = staticGroupSimple(quantity, action)
                    next_state = self.disc.quantityToState(next_quantity)
                    transition_f[j, i, next_state] += 1
        if learn == True:
            trans = learn_tran()
            transition_f = np.add(transition_f, trans)
        row_sums = np.sum(transition_f, axis=2)
        transition_f = transition_f / row_sums[:, :, np.newaxis]
        self.transition_f = transition_f
        transition_smart()