def performAction(self, action): """ POMDP tasks, as they have discrete actions, can me used by providing either an index, or an array with a 1-in-n coding (which can be stochastic). """ if type(action) == ndarray: action = drawIndex(action, tolerant = True) self.steps += 1 EpisodicTask.performAction(self, action)
def performAction(self, action): action = self.action_from_joint_angles(action) # Carry out the action based on angular velocities EpisodicTask.performAction(self, action) return
def performAction(self, action): """ POMDP tasks, as they have discrete actions, can me used by providing either an index, or an array with a 1-in-n coding (which can be stochastic). """ if type(action) == ndarray: action = drawIndex(action, tolerant=True) self.steps += 1 EpisodicTask.performAction(self, action)
def performAction(self, action): action = self.action_from_joint_angles(action) # Carry out the action based on angular velocities EpisodicTask.performAction(self, action) return
def performAction(self, action): """ a filtered mapping towards performAction of the underlying environment. """ # scaling self.incStep() action = (action + 1.0) / 2.0 * self.dif + self.env.fraktMin * self.env.dists[0] #Clipping the maximal change in actions (max force clipping) action = clip(action, self.action - self.maxSpeed, self.action + self.maxSpeed) EpisodicTask.performAction(self, action) self.action = action.copy()
def performAction(self, action): #Filtered mapping towards performAction of the underlying environment #The standard Johnnie task uses a PID controller to controll directly angles instead of forces #This makes most tasks much simpler to learn isJoints=self.env.getSensorByName('JointSensor') #The joint angles isSpeeds=self.env.getSensorByName('JointVelocitySensor') #The joint angular velocitys act=(action+1.0)/2.0*(self.env.cHighList-self.env.cLowList)+self.env.cLowList #norm output to action intervall action=tanh((act-isJoints-isSpeeds)*16.0)*self.maxPower*self.env.tourqueList #simple PID EpisodicTask.performAction(self, action)
def performAction(self, action): """ a filtered mapping towards performAction of the underlying environment. """ # scaling self.incStep() action = (action + 1.0) / 2.0 * self.dif + self.env.fraktMin * self.env.dists[0] #Clipping the maximal change in actions (max force clipping) action = clip(action, self.action - self.maxSpeed, self.action + self.maxSpeed) EpisodicTask.performAction(self, action) self.action = action.copy()
def performAction(self, action): #Filtered mapping towards performAction of the underlying environment #The standard CCRL task uses a PID controller to controll directly angles instead of forces #This makes most tasks much simpler to learn self.oldAction = action #Grasping as reflex depending on the distance to target - comment in for more easy grasping if abs(abs(self.dist[:3]).sum())<2.0: action[15]=1.0 #self.grepRew=action[15]*.01 else: action[15]=-1.0 #self.grepRew=action[15]*-.03 isJoints=array(self.env.getSensorByName('JointSensor')) #The joint angles isSpeeds=array(self.env.getSensorByName('JointVelocitySensor')) #The joint angular velocitys act=(action+1.0)/2.0*(self.env.cHighList-self.env.cLowList)+self.env.cLowList #norm output to action intervall action=tanh((act-isJoints-0.9*isSpeeds*self.env.tourqueList)*16.0)*self.maxPower*self.env.tourqueList #simple PID EpisodicTask.performAction(self, action)
def performAction(self, action): #Filtered mapping towards performAction of the underlying environment #The standard Johnnie task uses a PID controller to controll directly angles instead of forces #This makes most tasks much simpler to learn isJoints = self.env.getSensorByName('JointSensor') #The joint angles isSpeeds = self.env.getSensorByName( 'JointVelocitySensor') #The joint angular velocitys act = (action + 1.0) / 2.0 * ( self.env.cHighList - self.env.cLowList ) + self.env.cLowList #norm output to action intervall action = tanh((act - isJoints - isSpeeds) * 16.0) * self.maxPower * self.env.tourqueList #simple PID EpisodicTask.performAction(self, action)
def performAction(self, action): #Filtered mapping towards performAction of the underlying environment #The standard Tetra2 task uses a PID controller to control directly angles instead of forces #This makes most tasks much simpler to learn isJoints=self.env.getSensorByName('JointSensor') #The joint angles #print "Pos:", [int(i*10) for i in isJoints] isSpeeds=self.env.getSensorByName('JointVelocitySensor') #The joint angular velocitys #print "Speeds:", [int(i*10) for i in isSpeeds] #print "Action", action, "cHighList",self.env.cHighList , self.env.cLowList #act=(action+1.0)/2.0*(self.env.cHighList-self.env.cLowList)+self.env.cLowList #norm output to action intervall #action=tanh(act-isJoints-isSpeeds)*self.maxPower*self.env.tourqueList #simple PID #print "Action", act[:5] EpisodicTask.performAction(self, action *self.maxPower*self.env.tourqueList)
def performAction(self, action): #Filtered mapping towards performAction of the underlying environment #The standard Tetra2 task uses a PID controller to control directly angles instead of forces #This makes most tasks much simpler to learn isJoints = self.env.getSensorByName('JointSensor') #The joint angles #print "Pos:", [int(i*10) for i in isJoints] isSpeeds = self.env.getSensorByName( 'JointVelocitySensor') #The joint angular velocitys #print "Speeds:", [int(i*10) for i in isSpeeds] #print "Action", action, "cHighList",self.env.cHighList , self.env.cLowList #act=(action+1.0)/2.0*(self.env.cHighList-self.env.cLowList)+self.env.cLowList #norm output to action intervall #action=tanh(act-isJoints-isSpeeds)*self.maxPower*self.env.tourqueList #simple PID #print "Action", act[:5] EpisodicTask.performAction( self, action * self.maxPower * self.env.tourqueList)
def performAction(self, action): #Filtered mapping towards performAction of the underlying environment #The standard CCRL task uses a PID controller to controll directly angles instead of forces #This makes most tasks much simpler to learn self.oldAction = action #Grasping as reflex depending on the distance to target - comment in for more easy grasping if abs(abs(self.dist[:3]).sum()) < 2.0: action[15] = 1.0 #self.grepRew=action[15]*.01 else: action[15] = -1.0 #self.grepRew=action[15]*-.03 isJoints = array( self.env.getSensorByName('JointSensor')) #The joint angles isSpeeds = array(self.env.getSensorByName( 'JointVelocitySensor')) #The joint angular velocitys act = (action + 1.0) / 2.0 * ( self.env.cHighList - self.env.cLowList ) + self.env.cLowList #norm output to action intervall action = tanh( (act - isJoints - 0.9 * isSpeeds * self.env.tourqueList) * 16.0) * self.maxPower * self.env.tourqueList #simple PID EpisodicTask.performAction(self, action)
def performAction(self, actions): self.t += 1 actions = clip(actions, -0.05, 0.05) # set speed limit EpisodicTask.performAction(self, actions)
def performAction(self, action): self.t += 1 EpisodicTask.performAction(self, action)
def performAction(self, action): EpisodicTask.performAction(self, action*self.maxPower)
def performAction(self, action): EpisodicTask.performAction(self, action) self.action = action
def performAction(self, action): EpisodicTask.performAction(self, action) self.t += self.dt self.appendLog()
def performAction(self, action): self.t += 1 EpisodicTask.performAction(self, action)
def performAction(self, action): self.oldDist = self.__getDistance() # copy current distance for shaping reward EpisodicTask.performAction(self, action) self.count += 1