def plotDrift(idx, startpoint, estimators): m = DiscModel() examples = extract_info(m, "Full", "good") for example in examples[0:idx]: states_kin = np.array(example.states[startpoint]) states_kin2 = np.array(example.states[startpoint]) for n, i in enumerate(example.actions[startpoint:-1]): if n == 0: nex = predict_next(states_kin, i, estimators) nex[0] = np.arcsin(nex[0]) * 2 states_kin = np.vstack( [states_kin, staticGroupSimple2(states_kin, i) - nex]) states_kin2 = np.vstack( [states_kin2, staticGroupSimple2(states_kin2, i)]) elif np.absolute(example.states[startpoint + n][0] - example.states[startpoint + n - 1][0]) > 0.1: states_kin = np.vstack( [states_kin, example.states[startpoint + n]]) states_kin2 = np.vstack( [states_kin2, example.states[startpoint + n]]) else: nex = predict_next(states_kin[n, :], example.actions[n + 1], estimators) nex[0] = np.arcsin(nex[0]) * 2 states_kin = np.vstack([ states_kin, staticGroupSimple2(states_kin[n, :], example.actions[n]) - nex ]) states_kin2 = np.vstack([ states_kin2, staticGroupSimple2(states_kin2[n, :], example.actions[n - 1]) ]) ex = np.array(example.states[startpoint::]) x = range(len(ex)) f, axarr = plt.subplots(2, sharex=True) axarr[0].scatter(x, ex[:, 1], color="blue", label="data") axarr[0].scatter(x, states_kin[:, 1], color="green", alpha=0.4, label="kinematic") axarr[0].scatter(x, states_kin2[:, 1], color="red", alpha=0.2, label="kinematic2") axarr[0].legend(bbox_to_anchor=(1., 1, 0., -0.06), loc=1) axarr[1].scatter(x, ex[:, 0], color="blue") axarr[1].scatter(x, states_kin[:, 0], color="green", alpha=0.4) axarr[1].scatter(x, states_kin2[:, 0], color="red", alpha=0.2) axarr[1].set_ylabel("Angle/rad") axarr[0].set_ylabel("Distance") axarr[1].set_xlabel("Example")
def plot_reg_drift(idx,startpoint): m = DiscModel() examples = extract_info(m,"Full","good") example = examples[idx] estimators = learn_tran_regression(300,4) states_kin = np.array(example.states[startpoint]) for n,i in enumerate(example.actions[startpoint:-1]): if n == 0: states_kin = np.vstack([states_kin,staticGroupSimple2(states_kin,i)]) elif np.absolute(example.states[startpoint+n][0] -example.states[startpoint + n-1][0])>0.1: states_kin = np.vstack([states_kin,example.states[startpoint+n]]) else: print example.actions[n] states_kin = np.vstack([states_kin,predict_next(states_kin[n,:],example.actions[n],estimators)]) ex = np.array(example.states[startpoint::]) x = range(len(ex)) f, axarr = plt.subplots(2,sharex = True) axarr[0].scatter(x,ex[:,1],color = "blue",label = "data") axarr[0].scatter(x,states_kin[:,1],color = "green",alpha = 0.4, label = "kinematic") axarr[0].legend(bbox_to_anchor=(1., 1,0.,-0.06),loc=1) axarr[1].scatter(x,ex[:,0],color = "blue") axarr[1].scatter(x,states_kin[:,0],color = "green", alpha = 0.4) axarr[1].set_ylabel("Angle/rad") axarr[0].set_ylabel("Distance") axarr[1].set_xlabel("Example") plt.show()
def buildTransitionFunction(self, bin_samples, learn=True): def transition_smart(): self.transition = Transition() tot_states = self.transition.tot_states = transition_f.shape[1] tot_actions = self.transition.tot_actions = transition_f.shape[0] self.transition.backward = [{} for j in xrange(tot_states * tot_actions)] self.transition.forward = [{} for j in xrange(tot_states * tot_actions)] idx = np.transpose(np.array(np.nonzero(transition_f))) for i in idx: self.transition.backward[i[0] + i[1] * tot_actions][str( i[2])] = transition_f[i[0], i[1], i[2]] self.transition.forward[i[0] + i[2] * tot_actions][str( i[1])] = transition_f[i[0], i[1], i[2]] print self.transition.backward #print "Got Here estemated" transition_f = np.zeros([ self.disc.tot_actions, self.disc.tot_states, self.disc.tot_states ]) transtion_test = np.zeros([ self.disc.tot_actions, self.disc.tot_states, self.disc.tot_states ]) estimators = learn_correction(10, 10) for i in xrange(self.disc.tot_states): for j in xrange(self.disc.tot_actions): bins = self.disc.stateToBins(i) for k in xrange(bin_samples): assert bin_samples != 0 if bin_samples == 1: samp = False else: samp = True quantity = self.disc.binsToQuantity(bins, sample=samp) action = self.disc.indexToAction(j) correction = predict_next(quantity, action, estimators) correction[0] = np.arcsin(correction[0]) * 2 next_quantity = staticGroupSimple2(quantity, action) - correction #next_quantity2 = staticGroupSimple2(quantity,action) #if np.sum(next_quantity[:1]-next_quantity2[:1])>1: # print "Quantity",quantity # print "ACTION",action # print next_quantity[:2],next_quantity2[:2] next_state = self.disc.quantityToState(next_quantity) #next2 = self.disc.quantityToState(next_quantity2) #print "state",ic #print "actions",j transition_f[j, i, next_state] += 1 if learn == True: trans = learn_tran() transition_f = np.add(transition_f, 2 * trans) row_sums = np.sum(transition_f, axis=2) transition_f = transition_f / row_sums[:, :, np.newaxis] self.transition_f = transition_f transition_smart()
def plotDrift(idx,startpoint,estimators): m = DiscModel() examples = extract_info(m,"Full","good") for example in examples[0:idx]: states_kin = np.array(example.states[startpoint]) states_kin2 =np.array(example.states[startpoint]) for n,i in enumerate(example.actions[startpoint:-1]): if n == 0: nex = predict_next(states_kin,i,estimators) nex[0] = np.arcsin(nex[0])*2 states_kin = np.vstack([states_kin,staticGroupSimple2(states_kin,i)-nex]) states_kin2 = np.vstack([states_kin2,staticGroupSimple2(states_kin2,i)]) elif np.absolute(example.states[startpoint+n][0] -example.states[startpoint + n-1][0])>0.1: states_kin = np.vstack([states_kin,example.states[startpoint+n]]) states_kin2 = np.vstack([states_kin2,example.states[startpoint+n]]) else: nex = predict_next(states_kin[n,:],example.actions[n+1],estimators) nex[0] = np.arcsin(nex[0])*2 states_kin = np.vstack([states_kin,staticGroupSimple2(states_kin[n,:],example.actions[n])-nex]) states_kin2 = np.vstack([states_kin2,staticGroupSimple2(states_kin2[n,:],example.actions[n-1])]) ex = np.array(example.states[startpoint::]) x = range(len(ex)) f, axarr = plt.subplots(2,sharex = True) axarr[0].scatter(x,ex[:,1],color = "blue",label = "data") axarr[0].scatter(x,states_kin[:,1],color = "green",alpha = 0.4, label = "kinematic") axarr[0].scatter(x,states_kin2[:,1],color = "red",alpha = 0.2, label = "kinematic2") axarr[0].legend(bbox_to_anchor=(1., 1,0.,-0.06),loc=1) axarr[1].scatter(x,ex[:,0],color = "blue") axarr[1].scatter(x,states_kin[:,0],color = "green", alpha = 0.4) axarr[1].scatter(x,states_kin2[:,0],color = "red", alpha = 0.2) axarr[1].set_ylabel("Angle/rad") axarr[0].set_ylabel("Distance") axarr[1].set_xlabel("Example")
def buildTransitionFunction(self,bin_samples,learn =True): def transition_smart(): self.transition = Transition() tot_states=self.transition.tot_states = transition_f.shape[1] tot_actions=self.transition.tot_actions =transition_f.shape[0] self.transition.backward = [{} for j in xrange(tot_states*tot_actions)] self.transition.forward = [{} for j in xrange(tot_states*tot_actions)] idx = np.transpose(np.array(np.nonzero(transition_f))) for i in idx: self.transition.backward[i[0] + i[1]*tot_actions][str(i[2])] = transition_f[i[0],i[1],i[2]] self.transition.forward[i[0] + i[2]*tot_actions][str(i[1])] = transition_f[i[0],i[1],i[2]] print self.transition.backward #print "Got Here estemated" transition_f = np.zeros([self.disc.tot_actions,self.disc.tot_states,self.disc.tot_states]) transtion_test = np.zeros([self.disc.tot_actions,self.disc.tot_states,self.disc.tot_states]) estimators = learn_correction(10,10) for i in xrange(self.disc.tot_states): for j in xrange(self.disc.tot_actions): bins = self.disc.stateToBins(i) for k in xrange(bin_samples): assert bin_samples != 0 if bin_samples==1: samp = False else: samp = True quantity = self.disc.binsToQuantity(bins,sample = samp) action = self.disc.indexToAction(j) correction = predict_next(quantity,action,estimators) correction[0] = np.arcsin(correction[0])*2 next_quantity = staticGroupSimple2(quantity,action) - correction #next_quantity2 = staticGroupSimple2(quantity,action) #if np.sum(next_quantity[:1]-next_quantity2[:1])>1: # print "Quantity",quantity # print "ACTION",action # print next_quantity[:2],next_quantity2[:2] next_state = self.disc.quantityToState(next_quantity) #next2 = self.disc.quantityToState(next_quantity2) #print "state",ic #print "actions",j transition_f[j,i,next_state] += 1 if learn ==True: trans = learn_tran() transition_f =np.add( transition_f ,2* trans) row_sums = np.sum(transition_f,axis=2) transition_f = transition_f/row_sums[:,:,np.newaxis] self.transition_f = transition_f transition_smart()
def plot_reg_drift(idx, startpoint): m = DiscModel() examples = extract_info(m, "Full", "good") example = examples[idx] estimators = learn_tran_regression(300, 4) states_kin = np.array(example.states[startpoint]) for n, i in enumerate(example.actions[startpoint:-1]): if n == 0: states_kin = np.vstack( [states_kin, staticGroupSimple2(states_kin, i)]) elif np.absolute(example.states[startpoint + n][0] - example.states[startpoint + n - 1][0]) > 0.1: states_kin = np.vstack( [states_kin, example.states[startpoint + n]]) else: print example.actions[n] states_kin = np.vstack([ states_kin, predict_next(states_kin[n, :], example.actions[n], estimators) ]) ex = np.array(example.states[startpoint::]) x = range(len(ex)) f, axarr = plt.subplots(2, sharex=True) axarr[0].scatter(x, ex[:, 1], color="blue", label="data") axarr[0].scatter(x, states_kin[:, 1], color="green", alpha=0.4, label="kinematic") axarr[0].legend(bbox_to_anchor=(1., 1, 0., -0.06), loc=1) axarr[1].scatter(x, ex[:, 0], color="blue") axarr[1].scatter(x, states_kin[:, 0], color="green", alpha=0.4) axarr[1].set_ylabel("Angle/rad") axarr[0].set_ylabel("Distance") axarr[1].set_xlabel("Example") plt.show()