Example #1
0
 def performAction(self, action):
     """ POMDP tasks, as they have discrete actions, can me used by providing either an index,
     or an array with a 1-in-n coding (which can be stochastic). """
     if type(action) == ndarray:
         action = drawIndex(action, tolerant = True)
     self.steps += 1
     EpisodicTask.performAction(self, action)
Example #2
0
 def performAction(self, action):
     """ POMDP tasks, as they have discrete actions, can me used by providing either an index,
     or an array with a 1-in-n coding (which can be stochastic). """
     if type(action) == ndarray:
         action = drawIndex(action, tolerant=True)
     self.steps += 1
     EpisodicTask.performAction(self, action)
Example #3
0
 def reset(self):
     self.reward[0] = 0.0
     self.rawReward = 0.0
     self.env.reset()
     self.action = [self.env.dists[0]] * self.outDim
     self.epiStep = 0
     EpisodicTask.reset(self)
Example #4
0
 def __init__(self, environment):
     EpisodicTask.__init__(self, environment)
     self.reward_history = []
     self.count = 0
     # normalize to (-1, 1)
     self.sensor_limits = [(-pi, pi), (-20, 20)]
     self.actor_limits = [(-1, 1)]
Example #5
0
 def reset(self):
     self.reward[0]=0.0   
     self.rawReward=0.0         
     self.env.reset()
     self.action=[self.env.dists[0]]*self.outDim
     self.epiStep=0
     EpisodicTask.reset(self)
Example #6
0
 def __init__(self, environment):
     EpisodicTask.__init__(self, environment)
     self.reward_history = []
     self.count = 0 
     # normalize to (-1, 1)
     self.sensor_limits = [(-pi, pi), (-20, 20)]
     self.actor_limits = [(-1, 1)]
Example #7
0
 def performAction(self, action):
     """ a filtered mapping towards performAction of the underlying environment. """                
     # scaling
     self.incStep()
     action=(action+1.0)/2.0*self.dif+self.env.fraktMin*self.env.dists[0]
     #Clipping the maximal change in actions (max force clipping)
     action=clip(action, self.action-self.maxSpeed, self.action+self.maxSpeed)
     EpisodicTask.performAction(self, action)
     self.action=action.copy()
Example #8
0
 def performAction(self, action):
     """ a filtered mapping towards performAction of the underlying environment. """
     # scaling
     self.incStep()
     action = (action +
               1.0) / 2.0 * self.dif + self.env.fraktMin * self.env.dists[0]
     #Clipping the maximal change in actions (max force clipping)
     action = clip(action, self.action - self.maxSpeed,
                   self.action + self.maxSpeed)
     EpisodicTask.performAction(self, action)
     self.action = action.copy()
Example #9
0
 def __init__(self, env):
     EpisodicTask.__init__(self, env)
     self.step=0
     self.epiStep=0
     self.reward=[0.0]
     self.rawReward=0.0
     self.obsSensors=["EdgesReal"]
     self.rewardSensor=[""]
     self.oldReward=0.0
     self.plotString=["World Interactions", "Reward", "Reward on NoReward Task"]    
     self.inDim=len(self.getObservation())
     self.outDim=self.env.actLen        
     self.dif=(self.env.fraktMax-self.env.fraktMin)*self.env.dists[0]
     self.maxSpeed=self.dif/30.0 
     self.picCount=0
     self.epiLen=1
Example #10
0
 def __init__(self, env):
     EpisodicTask.__init__(self, env)
     self.step = 0
     self.epiStep = 0
     self.reward = [0.0]
     self.rawReward = 0.0
     self.obsSensors = ["EdgesReal"]
     self.rewardSensor = [""]
     self.oldReward = 0.0
     self.plotString = [
         "World Interactions", "Reward", "Reward on NoReward Task"
     ]
     self.inDim = len(self.getObservation())
     self.outDim = self.env.actLen
     self.dif = (self.env.fraktMax - self.env.fraktMin) * self.env.dists[0]
     self.maxSpeed = self.dif / 30.0
     self.picCount = 0
     self.epiLen = 1
Example #11
0
    def __init__(self, env=None, maxsteps=1000):
        """
        @param env: (optional) an instance of a CartPoleEnvironment (or a subclass thereof)
        @param maxsteps: maximal number of steps (default: 1000) 
        """
        if env == None:
            env = ShipSteeringEnvironment()
        EpisodicTask.__init__(self, env)
        self.N = maxsteps
        self.t = 0

        # scale sensors
        #                          [h,              hdot,           v]
        self.sensor_limits = [(-180.0, +180.0), (-180.0, +180.0), (-10.0, +40.0)]

        # actions:              thrust,       rudder
        self.actor_limits = [(-1.0, +2.0), (-90.0, +90.0)]
        # scale reward over episode, such that max. return = 100
        self.rewardscale = 100.0 / maxsteps / self.sensor_limits[2][1]
Example #12
0
    def __init__(self, env):
        EpisodicTask.__init__(self, env)
        self.maxPower=100.0 #Overall maximal tourque - is multiplied with relative max tourque for individual joint to get individual max tourque
        self.reward_history = []
        self.count = 0 #timestep counter
        self.epiLen=500 #suggestet episodic length for normal Johnnie tasks
        self.incLearn=0 #counts the task resets for incrementall learning
        self.env.FricMu=20.0 #We need higher friction for Johnnie
        self.env.dt=0.01 #We also need more timly resolution

        # normalize standard sensors to (-1, 1)
        self.sensor_limits=[]
        #Angle sensors
        for i in range(self.env.actLen):
            self.sensor_limits.append((self.env.cLowList[i], self.env.cHighList[i]))            
        # Joint velocity sensors
        for i in range(self.env.actLen):
            self.sensor_limits.append((-20, 20))
        #Norm all actor dimensions to (-1, 1)
        self.actor_limits = [(-1, 1)]*env.actLen
Example #13
0
    def __init__(self, env=None, maxsteps=1000):
        """
        @param env: (optional) an instance of a CartPoleEnvironment (or a subclass thereof)
        @param maxsteps: maximal number of steps (default: 1000) 
        """
        if env == None:
            env = ShipSteeringEnvironment()
        EpisodicTask.__init__(self, env)
        self.N = maxsteps
        self.t = 0

        # scale sensors
        #                          [h,              hdot,           v]
        self.sensor_limits = [(-180.0, +180.0), (-180.0, +180.0),
                              (-10.0, +40.0)]

        # actions:              thrust,       rudder
        self.actor_limits = [(-1.0, +2.0), (-90.0, +90.0)]
        # scale reward over episode, such that max. return = 100
        self.rewardscale = 100. / maxsteps / self.sensor_limits[2][1]
Example #14
0
    def __init__(self, env):
        EpisodicTask.__init__(self, env)
        self.maxPower = 100.0  #Overall maximal tourque - is multiplied with relative max tourque for individual joint to get individual max tourque
        self.reward_history = []
        self.count = 0  #timestep counter
        self.epiLen = 500  #suggestet episodic length for normal Johnnie tasks
        self.incLearn = 0  #counts the task resets for incrementall learning
        self.env.FricMu = 20.0  #We need higher friction for Johnnie
        self.env.dt = 0.01  #We also need more timly resolution

        # normalize standard sensors to (-1, 1)
        self.sensor_limits = []
        #Angle sensors
        for i in range(self.env.actLen):
            self.sensor_limits.append(
                (self.env.cLowList[i], self.env.cHighList[i]))
        # Joint velocity sensors
        for i in range(self.env.actLen):
            self.sensor_limits.append((-20, 20))
        #Norm all actor dimensions to (-1, 1)
        self.actor_limits = [(-1, 1)] * env.actLen
Example #15
0
    def __init__(self, env):
        EpisodicTask.__init__(self, env)
        #Overall maximal tourque - is multiplied with relative max tourque for individual joint.
        self.maxPower = 100.0
        self.reward_history = []
        self.count = 0  #timestep counter
        self.epiLen = 1500  #suggestet episodic length for normal Johnnie tasks
        self.incLearn = 0  #counts the task resets for incrementall learning
        self.env.FricMu = 20.0  #We need higher friction for CCRL
        self.env.dt = 0.002  #We also need more timly resolution

        # normalize standard sensors to (-1, 1)
        self.sensor_limits = []
        #Angle sensors
        for i in range(self.env.actLen):
            self.sensor_limits.append(
                (self.env.cLowList[i], self.env.cHighList[i]))
        # Joint velocity sensors
        for i in range(self.env.actLen):
            self.sensor_limits.append((-20, 20))
        #Norm all actor dimensions to (-1, 1)
        self.actor_limits = [(-1, 1)] * env.actLen
        self.oldAction = zeros(env.actLen, float)
        self.dist = zeros(9, float)
        self.dif = array([0.0, 0.0, 0.0])
        self.target = array([-6.5, 1.75, -10.5])
        self.grepRew = 0.0
        self.tableFlag = 0.0
        self.env.addSensor(SpecificBodyPositionSensor(['objectP00'],
                                                      "glasPos"))
        self.env.addSensor(SpecificBodyPositionSensor(['palmLeft'], "palmPos"))
        self.env.addSensor(
            SpecificBodyPositionSensor(['fingerLeft1'], "finger1Pos"))
        self.env.addSensor(
            SpecificBodyPositionSensor(['fingerLeft2'], "finger2Pos"))
        #we changed sensors so we need to update environments sensorLength variable
        self.env.obsLen = len(self.env.getSensors())
        #normalization for the task spezific sensors
        for i in range(self.env.obsLen - 2 * self.env.actLen):
            self.sensor_limits.append((-4, 4))
Example #16
0
    def __init__(self, env=None, maxsteps=1000):
        """
        @param env: (optional) an instance of a CartPoleEnvironment (or a subclass thereof)
        @param maxsteps: maximal number of steps (default: 1000) 
        """
        if env == None:
            env = CartPoleEnvironment()
        EpisodicTask.__init__(self, env)
        self.N = maxsteps
        self.t = 0

        # scale position and angle, don't scale velocities (unknown maximum)
        self.sensor_limits = [(-3, 3)]  # , None, (-pi, pi), None]
        for i in range(1, self.outdim):
            if isinstance(self.env, NonMarkovPoleEnvironment) and i % 2 == 0:
                self.sensor_limits.append(None)
            else:
                self.sensor_limits.append((-pi, pi))

        self.sensor_limits = [None] * 4
        # actor between -10 and 10 Newton
        self.actor_limits = [(-10, 10)]
Example #17
0
    def __init__(self, env=None, maxsteps=1000):
        """
        @param env: (optional) an instance of a CartPoleEnvironment (or a subclass thereof)
        @param maxsteps: maximal number of steps (default: 1000) 
        """
        if env == None:
            env = CartPoleEnvironment()
        EpisodicTask.__init__(self, env)
        self.N = maxsteps
        self.t = 0

        # scale position and angle, don't scale velocities (unknown maximum)
        self.sensor_limits = [(-3, 3)]  #, None, (-pi, pi), None]
        for i in range(1, self.outdim):
            if isinstance(self.env, NonMarkovPoleEnvironment) and i % 2 == 0:
                self.sensor_limits.append(None)
            else:
                self.sensor_limits.append((-pi, pi))

        self.sensor_limits = [None] * 4
        # actor between -10 and 10 Newton
        self.actor_limits = [(-10, 10)]
Example #18
0
File: ccrl.py Project: HKou/pybrain
    def __init__(self, env):
        EpisodicTask.__init__(self, env)
        #Overall maximal tourque - is multiplied with relative max tourque for individual joint.
        self.maxPower=100.0
        self.reward_history = []
        self.count = 0 #timestep counter
        self.epiLen=1500 #suggestet episodic length for normal Johnnie tasks
        self.incLearn=0 #counts the task resets for incrementall learning
        self.env.FricMu=20.0 #We need higher friction for CCRL
        self.env.dt=0.002 #We also need more timly resolution

        # normalize standard sensors to (-1, 1)
        self.sensor_limits=[]
        #Angle sensors
        for i in range(self.env.actLen):
            self.sensor_limits.append((self.env.cLowList[i], self.env.cHighList[i]))            
        # Joint velocity sensors
        for i in range(self.env.actLen):
            self.sensor_limits.append((-20, 20))
        #Norm all actor dimensions to (-1, 1)
        self.actor_limits = [(-1, 1)]*env.actLen
        self.oldAction=zeros(env.actLen, float)
        self.dist=zeros(9, float)
        self.dif=array([0.0,0.0,0.0])
        self.target=array([-6.5,1.75,-10.5])
        self.grepRew=0.0
        self.tableFlag=0.0
        self.env.addSensor(SpecificBodyPositionSensor(['objectP00'], "glasPos"))
        self.env.addSensor(SpecificBodyPositionSensor(['palmLeft'], "palmPos"))
        self.env.addSensor(SpecificBodyPositionSensor(['fingerLeft1'], "finger1Pos"))
        self.env.addSensor(SpecificBodyPositionSensor(['fingerLeft2'], "finger2Pos"))
        #we changed sensors so we need to update environments sensorLength variable
        self.env.obsLen=len(self.env.getSensors())
        #normalization for the task spezific sensors
        for i in range(self.env.obsLen-2*self.env.actLen):
            self.sensor_limits.append((-4, 4))
Example #19
0
 def reset(self):
     EpisodicTask.reset(self)
     self.t = 0
Example #20
0
 def performAction(self, action):
     self.t += 1
     EpisodicTask.performAction(self, action)
Example #21
0
 def reset(self):
     EpisodicTask.reset(self)
     self.t = 0
Example #22
0
 def __init__(self, environment):
     EpisodicTask.__init__(self, environment)
     self.N = 15
     self.t = 0
     self.state = [0.0] * environment.dim
     self.action = [0.0] * environment.dim
Example #23
0
 def performAction(self, action):
     EpisodicTask.performAction(self, action)
     self.action = action
Example #24
0
 def getObservation(self):
     self.state = EpisodicTask.getObservation(self)
     return self.state
Example #25
0
 def performAction(self, action):
     EpisodicTask.performAction(self, action)
     self.action = action
Example #26
0
 def __init__(self, environment):
     EpisodicTask.__init__(self, environment)
     self.N = 15
     self.t = 0
     self.state = [0.0] * environment.dim
     self.action = [0.0] * environment.dim
Example #27
0
 def reset(self):
     self.steps = 0
     EpisodicTask.reset(self)
Example #28
0
 def reset(self):
     self.steps = 0
     EpisodicTask.reset(self)
Example #29
0
 def performAction(self, action):
     self.t += 1
     EpisodicTask.performAction(self, action)
Example #30
0
 def __init__(self):
     self.environment = SimpleraceEnvironment()
     EpisodicTask.__init__(self, self.environment)
     self.t = 0
     
Example #31
0
 def getObservation(self):
     self.state = EpisodicTask.getObservation(self)
     return self.state