def get_dataset(examples): next_kin = [] next_data = [] for example in examples: for n, step in enumerate(example.states[:-1]): next_kin.append(staticGroupSimple2(step, example.actions[n], 0.129)) next_data.append(example.states[n + 1]) #if np.sin((np.absolute(np.array(next_kin)[-1,0]-np.array(next_data)[-1,0]))/2) >0.1: # print np.array(next_kin)[-1,:] # print np.array(next_data)[-1,:] # print step,example.actions[n+1] # print example.states[n+2] # print '-------------------------------------------' next_kin = np.array(next_kin) next_data = np.array(next_data) diff = next_kin - next_data diff[:, 0] = np.sin(diff[:, 0] / 2) #simple filter for i in range(len(diff[:, 0])): if np.absolute(diff[i, 0]) > 0.2: diff[i, 0] = 0 X = np.concatenate([ np.hstack((example.states[:-1, :], example.actions[:-1, :])) for example in examples ], axis=0) return X, diff
def buildTransitionFunction(self, bin_samples, learn=True): def transition_smart(): self.transition = Transition() tot_states = self.transition.tot_states = transition_f.shape[1] tot_actions = self.transition.tot_actions = transition_f.shape[0] self.transition.backward = [{} for j in xrange(tot_states * tot_actions)] self.transition.forward = [{} for j in xrange(tot_states * tot_actions)] idx = np.transpose(np.array(np.nonzero(transition_f))) for i in idx: self.transition.backward[i[0] + i[1] * tot_actions][str( i[2])] = transition_f[i[0], i[1], i[2]] self.transition.forward[i[0] + i[2] * tot_actions][str( i[1])] = transition_f[i[0], i[1], i[2]] print self.transition.backward #print "Got Here estemated" transition_f = np.zeros([ self.disc.tot_actions, self.disc.tot_states, self.disc.tot_states ]) transtion_test = np.zeros([ self.disc.tot_actions, self.disc.tot_states, self.disc.tot_states ]) estimators = learn_correction(10, 10) for i in xrange(self.disc.tot_states): for j in xrange(self.disc.tot_actions): bins = self.disc.stateToBins(i) for k in xrange(bin_samples): assert bin_samples != 0 if bin_samples == 1: samp = False else: samp = True quantity = self.disc.binsToQuantity(bins, sample=samp) action = self.disc.indexToAction(j) correction = predict_next(quantity, action, estimators) correction[0] = np.arcsin(correction[0]) * 2 next_quantity = staticGroupSimple2(quantity, action) - correction #next_quantity2 = staticGroupSimple2(quantity,action) #if np.sum(next_quantity[:1]-next_quantity2[:1])>1: # print "Quantity",quantity # print "ACTION",action # print next_quantity[:2],next_quantity2[:2] next_state = self.disc.quantityToState(next_quantity) #next2 = self.disc.quantityToState(next_quantity2) #print "state",ic #print "actions",j transition_f[j, i, next_state] += 1 if learn == True: trans = learn_tran() transition_f = np.add(transition_f, 2 * trans) row_sums = np.sum(transition_f, axis=2) transition_f = transition_f / row_sums[:, :, np.newaxis] self.transition_f = transition_f transition_smart()
def buildTransitionFunction(self,bin_samples,learn =True): def transition_smart(): self.transition = Transition() tot_states=self.transition.tot_states = transition_f.shape[1] tot_actions=self.transition.tot_actions =transition_f.shape[0] self.transition.backward = [{} for j in xrange(tot_states*tot_actions)] self.transition.forward = [{} for j in xrange(tot_states*tot_actions)] idx = np.transpose(np.array(np.nonzero(transition_f))) for i in idx: self.transition.backward[i[0] + i[1]*tot_actions][str(i[2])] = transition_f[i[0],i[1],i[2]] self.transition.forward[i[0] + i[2]*tot_actions][str(i[1])] = transition_f[i[0],i[1],i[2]] print self.transition.backward #print "Got Here estemated" transition_f = np.zeros([self.disc.tot_actions,self.disc.tot_states,self.disc.tot_states]) transtion_test = np.zeros([self.disc.tot_actions,self.disc.tot_states,self.disc.tot_states]) estimators = learn_correction(10,10) for i in xrange(self.disc.tot_states): for j in xrange(self.disc.tot_actions): bins = self.disc.stateToBins(i) for k in xrange(bin_samples): assert bin_samples != 0 if bin_samples==1: samp = False else: samp = True quantity = self.disc.binsToQuantity(bins,sample = samp) action = self.disc.indexToAction(j) correction = predict_next(quantity,action,estimators) correction[0] = np.arcsin(correction[0])*2 next_quantity = staticGroupSimple2(quantity,action) - correction #next_quantity2 = staticGroupSimple2(quantity,action) #if np.sum(next_quantity[:1]-next_quantity2[:1])>1: # print "Quantity",quantity # print "ACTION",action # print next_quantity[:2],next_quantity2[:2] next_state = self.disc.quantityToState(next_quantity) #next2 = self.disc.quantityToState(next_quantity2) #print "state",ic #print "actions",j transition_f[j,i,next_state] += 1 if learn ==True: trans = learn_tran() transition_f =np.add( transition_f ,2* trans) row_sums = np.sum(transition_f,axis=2) transition_f = transition_f/row_sums[:,:,np.newaxis] self.transition_f = transition_f transition_smart()
def get_dataset(examples): next_kin = [] next_data = [] for example in examples: for n, step in enumerate(example.states[:-1]): next_kin.append(staticGroupSimple2(step,example.actions[n],0.129)) next_data.append(example.states[n+1]) #if np.sin((np.absolute(np.array(next_kin)[-1,0]-np.array(next_data)[-1,0]))/2) >0.1: # print np.array(next_kin)[-1,:] # print np.array(next_data)[-1,:] # print step,example.actions[n+1] # print example.states[n+2] # print '-------------------------------------------' next_kin = np.array(next_kin) next_data = np.array(next_data) diff = next_kin-next_data diff[:,0] = np.sin(diff[:,0]/2) #simple filter for i in range(len(diff[:,0])): if np.absolute(diff[i,0]) >0.2: diff[i,0] = 0 X =np.concatenate([np.hstack((example.states[:-1,:],example.actions[:-1,:])) for example in examples],axis = 0) return X,diff