def performAction(self, action):
     """ POMDP tasks, as they have discrete actions, can me used by providing either an index,
     or an array with a 1-in-n coding (which can be stochastic). """
     if type(action) == ndarray:
         action = drawIndex(action, tolerant = True)
     self.steps += 1
     EpisodicTask.performAction(self, action)
Beispiel #2
0
 def performAction(self, action):
     #Filtered mapping towards performAction of the underlying environment   
     #The standard Johnnie task uses a PID controller to controll directly angles instead of forces
     #This makes most tasks much simpler to learn             
     isJoints=self.env.getSensorByName('JointSensor') #The joint angles
     isSpeeds=self.env.getSensorByName('JointVelocitySensor') #The joint angular velocitys
     act=(action+1.0)/2.0*(self.env.cHighList-self.env.cLowList)+self.env.cLowList #norm output to action intervall  
     action=tanh((act-isJoints-isSpeeds)*16.0)*self.maxPower*self.env.tourqueList #simple PID
     EpisodicTask.performAction(self, action)
Beispiel #3
0
 def performAction(self, action):
     """ a filtered mapping towards performAction of the underlying environment. """                
     # scaling
     self.incStep()
     action = (action + 1.0) / 2.0 * self.dif + self.env.fraktMin * self.env.dists[0]
     #Clipping the maximal change in actions (max force clipping)
     action = clip(action, self.action - self.maxSpeed, self.action + self.maxSpeed)
     EpisodicTask.performAction(self, action)
     self.action = action.copy()
 def performAction(self, action):
     #Filtered mapping towards performAction of the underlying environment   
     #The standard CCRL task uses a PID controller to controll directly angles instead of forces
     #This makes most tasks much simpler to learn             
     self.oldAction = action
     #Grasping as reflex depending on the distance to target - comment in for more easy grasping
     #if abs(self.dist[2])<2.0: action[15]=(1.0+2.0*action[15])*.3333 #self.grepRew=action[15]*.01
     #else: action[15]=(-1.0+2.0*action[15])*.3333 #self.grepRew=action[15]*-.03
     isJoints=array(self.env.getSensorByName('JointSensor')) #The joint angles
     isSpeeds=array(self.env.getSensorByName('JointVelocitySensor')) #The joint angular velocitys
     act=(action+1.0)/2.0*(self.env.cHighList-self.env.cLowList)+self.env.cLowList #norm output to action intervall  
     action=tanh((act-isJoints-0.9*isSpeeds*self.env.tourqueList)*16.0)*self.maxPower*self.env.tourqueList #simple PID
     EpisodicTask.performAction(self, action)
 def performAction(self, action):
     self.t += 1
     EpisodicTask.performAction(self, action)
Beispiel #6
0
 def performAction(self, action):
     EpisodicTask.performAction(self, action)
     self.action = action