Esempio n. 1
0
    def buildTransitionFunction(self, bin_samples, learn=True):
        def transition_smart():
            self.transition = Transition()
            tot_states = self.transition.tot_states = transition_f.shape[1]
            tot_actions = self.transition.tot_actions = transition_f.shape[0]
            self.transition.backward = [{} for j in xrange(tot_states *
                                                           tot_actions)]
            self.transition.forward = [{} for j in xrange(tot_states *
                                                          tot_actions)]
            idx = np.transpose(np.array(np.nonzero(transition_f)))
            for i in idx:
                self.transition.backward[i[0] + i[1] * tot_actions][str(
                    i[2])] = transition_f[i[0], i[1], i[2]]
                self.transition.forward[i[0] + i[2] * tot_actions][str(
                    i[1])] = transition_f[i[0], i[1], i[2]]
            print self.transition.backward

        #print "Got Here estemated"
        transition_f = np.zeros([
            self.disc.tot_actions, self.disc.tot_states, self.disc.tot_states
        ])
        transtion_test = np.zeros([
            self.disc.tot_actions, self.disc.tot_states, self.disc.tot_states
        ])
        estimators = learn_correction(10, 10)
        for i in xrange(self.disc.tot_states):
            for j in xrange(self.disc.tot_actions):
                bins = self.disc.stateToBins(i)
                for k in xrange(bin_samples):
                    assert bin_samples != 0
                    if bin_samples == 1:
                        samp = False
                    else:
                        samp = True
                    quantity = self.disc.binsToQuantity(bins, sample=samp)
                    action = self.disc.indexToAction(j)
                    correction = predict_next(quantity, action, estimators)
                    correction[0] = np.arcsin(correction[0]) * 2
                    next_quantity = staticGroupSimple2(quantity,
                                                       action) - correction
                    #next_quantity2 = staticGroupSimple2(quantity,action)
                    #if np.sum(next_quantity[:1]-next_quantity2[:1])>1:
                    #	print "Quantity",quantity
                    #	print "ACTION",action
                    #	print next_quantity[:2],next_quantity2[:2]
                    next_state = self.disc.quantityToState(next_quantity)
                    #next2 = self.disc.quantityToState(next_quantity2)
                    #print "state",ic
                    #print "actions",j
                    transition_f[j, i, next_state] += 1
        if learn == True:
            trans = learn_tran()
            transition_f = np.add(transition_f, 2 * trans)
        row_sums = np.sum(transition_f, axis=2)
        transition_f = transition_f / row_sums[:, :, np.newaxis]
        self.transition_f = transition_f
        transition_smart()
Esempio n. 2
0
	def buildTransitionFunction(self,bin_samples,learn =True):
		def transition_smart():
			self.transition = Transition()
			tot_states=self.transition.tot_states = transition_f.shape[1]
			tot_actions=self.transition.tot_actions =transition_f.shape[0]
			self.transition.backward = [{} for j in xrange(tot_states*tot_actions)]
			self.transition.forward = [{} for j in xrange(tot_states*tot_actions)]
			idx = np.transpose(np.array(np.nonzero(transition_f)))
			for i in idx:
				self.transition.backward[i[0] + i[1]*tot_actions][str(i[2])] = transition_f[i[0],i[1],i[2]]
				self.transition.forward[i[0] + i[2]*tot_actions][str(i[1])] = transition_f[i[0],i[1],i[2]]
			print self.transition.backward
		#print "Got Here estemated"	
		transition_f = np.zeros([self.disc.tot_actions,self.disc.tot_states,self.disc.tot_states])
		transtion_test = np.zeros([self.disc.tot_actions,self.disc.tot_states,self.disc.tot_states])
		estimators = learn_correction(10,10)
		for i in xrange(self.disc.tot_states):
			for j in xrange(self.disc.tot_actions):
				bins = self.disc.stateToBins(i)	
				for k in xrange(bin_samples):
					assert bin_samples != 0
					if bin_samples==1:
						samp = False
					else:
						samp = True
					quantity = self.disc.binsToQuantity(bins,sample = samp)
					action = self.disc.indexToAction(j)
					correction = predict_next(quantity,action,estimators)
					correction[0] = np.arcsin(correction[0])*2
					next_quantity = staticGroupSimple2(quantity,action) - correction
					#next_quantity2 = staticGroupSimple2(quantity,action)
					#if np.sum(next_quantity[:1]-next_quantity2[:1])>1:
					#	print "Quantity",quantity
					#	print "ACTION",action
					#	print next_quantity[:2],next_quantity2[:2]
					next_state = self.disc.quantityToState(next_quantity)
					#next2 = self.disc.quantityToState(next_quantity2)
					#print "state",ic
					#print "actions",j
					transition_f[j,i,next_state] += 1
		if learn ==True:
			trans = learn_tran()
			transition_f =np.add( transition_f ,2* trans)
		row_sums = np.sum(transition_f,axis=2)
		transition_f = transition_f/row_sums[:,:,np.newaxis]
		self.transition_f = transition_f
		transition_smart()
Esempio n. 3
0
		else:
			print example.actions[n]
			states_kin = np.vstack([states_kin,predict_next(states_kin[n,:],example.actions[n],estimators)])
	ex = np.array(example.states[startpoint::])
	x = range(len(ex))

	f, axarr = plt.subplots(2,sharex = True)
	axarr[0].scatter(x,ex[:,1],color = "blue",label = "data")
	axarr[0].scatter(x,states_kin[:,1],color = "green",alpha = 0.4, label = "kinematic")
	axarr[0].legend(bbox_to_anchor=(1., 1,0.,-0.06),loc=1)

	axarr[1].scatter(x,ex[:,0],color = "blue")
	axarr[1].scatter(x,states_kin[:,0],color = "green", alpha = 0.4)

	axarr[1].set_ylabel("Angle/rad")
	axarr[0].set_ylabel("Distance")
	axarr[1].set_xlabel("Example")
	plt.show()


#trajectoryCompare()

#w_fold1 =([-1.06272686, -1.26774088, -1.0001488,  -1.01855057, -0.37169806, -0.74641914,-2.10024122,
#-1.25042713, -1.15339101, -0.28474766, -0.34990737, -2.13778004,
 #-4.07049444])
#trajectoryCompare(w_fold1)
if __name__ == '__main__':
	estimators = learn_correction(300,7)
	plotDrift(5,0,estimators)
#plot_reg_drift(1,1)
	plt.show()
Esempio n. 4
0
    x = range(len(ex))

    f, axarr = plt.subplots(2, sharex=True)
    axarr[0].scatter(x, ex[:, 1], color="blue", label="data")
    axarr[0].scatter(x,
                     states_kin[:, 1],
                     color="green",
                     alpha=0.4,
                     label="kinematic")
    axarr[0].legend(bbox_to_anchor=(1., 1, 0., -0.06), loc=1)

    axarr[1].scatter(x, ex[:, 0], color="blue")
    axarr[1].scatter(x, states_kin[:, 0], color="green", alpha=0.4)

    axarr[1].set_ylabel("Angle/rad")
    axarr[0].set_ylabel("Distance")
    axarr[1].set_xlabel("Example")
    plt.show()


#trajectoryCompare()

#w_fold1 =([-1.06272686, -1.26774088, -1.0001488,  -1.01855057, -0.37169806, -0.74641914,-2.10024122,
#-1.25042713, -1.15339101, -0.28474766, -0.34990737, -2.13778004,
#-4.07049444])
#trajectoryCompare(w_fold1)
if __name__ == '__main__':
    estimators = learn_correction(300, 7)
    plotDrift(5, 0, estimators)
    #plot_reg_drift(1,1)
    plt.show()