def buildTransitionFunction(self, bin_samples, learn=True): def transition_smart(): self.transition = Transition() tot_states = self.transition.tot_states = transition_f.shape[1] tot_actions = self.transition.tot_actions = transition_f.shape[0] self.transition.backward = [{} for j in xrange(tot_states * tot_actions)] self.transition.forward = [{} for j in xrange(tot_states * tot_actions)] idx = np.transpose(np.array(np.nonzero(transition_f))) for i in idx: self.transition.backward[i[0] + i[1] * tot_actions][str( i[2])] = transition_f[i[0], i[1], i[2]] self.transition.forward[i[0] + i[2] * tot_actions][str( i[1])] = transition_f[i[0], i[1], i[2]] print self.transition.backward #print "Got Here estemated" transition_f = np.zeros([ self.disc.tot_actions, self.disc.tot_states, self.disc.tot_states ]) transtion_test = np.zeros([ self.disc.tot_actions, self.disc.tot_states, self.disc.tot_states ]) estimators = learn_correction(10, 10) for i in xrange(self.disc.tot_states): for j in xrange(self.disc.tot_actions): bins = self.disc.stateToBins(i) for k in xrange(bin_samples): assert bin_samples != 0 if bin_samples == 1: samp = False else: samp = True quantity = self.disc.binsToQuantity(bins, sample=samp) action = self.disc.indexToAction(j) correction = predict_next(quantity, action, estimators) correction[0] = np.arcsin(correction[0]) * 2 next_quantity = staticGroupSimple2(quantity, action) - correction #next_quantity2 = staticGroupSimple2(quantity,action) #if np.sum(next_quantity[:1]-next_quantity2[:1])>1: # print "Quantity",quantity # print "ACTION",action # print next_quantity[:2],next_quantity2[:2] next_state = self.disc.quantityToState(next_quantity) #next2 = self.disc.quantityToState(next_quantity2) #print "state",ic #print "actions",j transition_f[j, i, next_state] += 1 if learn == True: trans = learn_tran() transition_f = np.add(transition_f, 2 * trans) row_sums = np.sum(transition_f, axis=2) transition_f = transition_f / row_sums[:, :, np.newaxis] self.transition_f = transition_f transition_smart()
def buildTransitionFunction(self,bin_samples,learn =True): def transition_smart(): self.transition = Transition() tot_states=self.transition.tot_states = transition_f.shape[1] tot_actions=self.transition.tot_actions =transition_f.shape[0] self.transition.backward = [{} for j in xrange(tot_states*tot_actions)] self.transition.forward = [{} for j in xrange(tot_states*tot_actions)] idx = np.transpose(np.array(np.nonzero(transition_f))) for i in idx: self.transition.backward[i[0] + i[1]*tot_actions][str(i[2])] = transition_f[i[0],i[1],i[2]] self.transition.forward[i[0] + i[2]*tot_actions][str(i[1])] = transition_f[i[0],i[1],i[2]] print self.transition.backward #print "Got Here estemated" transition_f = np.zeros([self.disc.tot_actions,self.disc.tot_states,self.disc.tot_states]) transtion_test = np.zeros([self.disc.tot_actions,self.disc.tot_states,self.disc.tot_states]) estimators = learn_correction(10,10) for i in xrange(self.disc.tot_states): for j in xrange(self.disc.tot_actions): bins = self.disc.stateToBins(i) for k in xrange(bin_samples): assert bin_samples != 0 if bin_samples==1: samp = False else: samp = True quantity = self.disc.binsToQuantity(bins,sample = samp) action = self.disc.indexToAction(j) correction = predict_next(quantity,action,estimators) correction[0] = np.arcsin(correction[0])*2 next_quantity = staticGroupSimple2(quantity,action) - correction #next_quantity2 = staticGroupSimple2(quantity,action) #if np.sum(next_quantity[:1]-next_quantity2[:1])>1: # print "Quantity",quantity # print "ACTION",action # print next_quantity[:2],next_quantity2[:2] next_state = self.disc.quantityToState(next_quantity) #next2 = self.disc.quantityToState(next_quantity2) #print "state",ic #print "actions",j transition_f[j,i,next_state] += 1 if learn ==True: trans = learn_tran() transition_f =np.add( transition_f ,2* trans) row_sums = np.sum(transition_f,axis=2) transition_f = transition_f/row_sums[:,:,np.newaxis] self.transition_f = transition_f transition_smart()
else: print example.actions[n] states_kin = np.vstack([states_kin,predict_next(states_kin[n,:],example.actions[n],estimators)]) ex = np.array(example.states[startpoint::]) x = range(len(ex)) f, axarr = plt.subplots(2,sharex = True) axarr[0].scatter(x,ex[:,1],color = "blue",label = "data") axarr[0].scatter(x,states_kin[:,1],color = "green",alpha = 0.4, label = "kinematic") axarr[0].legend(bbox_to_anchor=(1., 1,0.,-0.06),loc=1) axarr[1].scatter(x,ex[:,0],color = "blue") axarr[1].scatter(x,states_kin[:,0],color = "green", alpha = 0.4) axarr[1].set_ylabel("Angle/rad") axarr[0].set_ylabel("Distance") axarr[1].set_xlabel("Example") plt.show() #trajectoryCompare() #w_fold1 =([-1.06272686, -1.26774088, -1.0001488, -1.01855057, -0.37169806, -0.74641914,-2.10024122, #-1.25042713, -1.15339101, -0.28474766, -0.34990737, -2.13778004, #-4.07049444]) #trajectoryCompare(w_fold1) if __name__ == '__main__': estimators = learn_correction(300,7) plotDrift(5,0,estimators) #plot_reg_drift(1,1) plt.show()
x = range(len(ex)) f, axarr = plt.subplots(2, sharex=True) axarr[0].scatter(x, ex[:, 1], color="blue", label="data") axarr[0].scatter(x, states_kin[:, 1], color="green", alpha=0.4, label="kinematic") axarr[0].legend(bbox_to_anchor=(1., 1, 0., -0.06), loc=1) axarr[1].scatter(x, ex[:, 0], color="blue") axarr[1].scatter(x, states_kin[:, 0], color="green", alpha=0.4) axarr[1].set_ylabel("Angle/rad") axarr[0].set_ylabel("Distance") axarr[1].set_xlabel("Example") plt.show() #trajectoryCompare() #w_fold1 =([-1.06272686, -1.26774088, -1.0001488, -1.01855057, -0.37169806, -0.74641914,-2.10024122, #-1.25042713, -1.15339101, -0.28474766, -0.34990737, -2.13778004, #-4.07049444]) #trajectoryCompare(w_fold1) if __name__ == '__main__': estimators = learn_correction(300, 7) plotDrift(5, 0, estimators) #plot_reg_drift(1,1) plt.show()