def performAction(self, action): """ POMDP tasks, as they have discrete actions, can me used by providing either an index, or an array with a 1-in-n coding (which can be stochastic). """ if type(action) == ndarray: action = drawIndex(action, tolerant = True) self.steps += 1 EpisodicTask.performAction(self, action)
def performAction(self, action): """ POMDP tasks, as they have discrete actions, can me used by providing either an index, or an array with a 1-in-n coding (which can be stochastic). """ if type(action) == ndarray: action = drawIndex(action, tolerant=True) self.steps += 1 EpisodicTask.performAction(self, action)
def reset(self): self.reward[0] = 0.0 self.rawReward = 0.0 self.env.reset() self.action = [self.env.dists[0]] * self.outDim self.epiStep = 0 EpisodicTask.reset(self)
def __init__(self, environment): EpisodicTask.__init__(self, environment) self.reward_history = [] self.count = 0 # normalize to (-1, 1) self.sensor_limits = [(-pi, pi), (-20, 20)] self.actor_limits = [(-1, 1)]
def reset(self): self.reward[0]=0.0 self.rawReward=0.0 self.env.reset() self.action=[self.env.dists[0]]*self.outDim self.epiStep=0 EpisodicTask.reset(self)
def performAction(self, action): """ a filtered mapping towards performAction of the underlying environment. """ # scaling self.incStep() action=(action+1.0)/2.0*self.dif+self.env.fraktMin*self.env.dists[0] #Clipping the maximal change in actions (max force clipping) action=clip(action, self.action-self.maxSpeed, self.action+self.maxSpeed) EpisodicTask.performAction(self, action) self.action=action.copy()
def performAction(self, action): """ a filtered mapping towards performAction of the underlying environment. """ # scaling self.incStep() action = (action + 1.0) / 2.0 * self.dif + self.env.fraktMin * self.env.dists[0] #Clipping the maximal change in actions (max force clipping) action = clip(action, self.action - self.maxSpeed, self.action + self.maxSpeed) EpisodicTask.performAction(self, action) self.action = action.copy()
def __init__(self, env): EpisodicTask.__init__(self, env) self.step=0 self.epiStep=0 self.reward=[0.0] self.rawReward=0.0 self.obsSensors=["EdgesReal"] self.rewardSensor=[""] self.oldReward=0.0 self.plotString=["World Interactions", "Reward", "Reward on NoReward Task"] self.inDim=len(self.getObservation()) self.outDim=self.env.actLen self.dif=(self.env.fraktMax-self.env.fraktMin)*self.env.dists[0] self.maxSpeed=self.dif/30.0 self.picCount=0 self.epiLen=1
def __init__(self, env): EpisodicTask.__init__(self, env) self.step = 0 self.epiStep = 0 self.reward = [0.0] self.rawReward = 0.0 self.obsSensors = ["EdgesReal"] self.rewardSensor = [""] self.oldReward = 0.0 self.plotString = [ "World Interactions", "Reward", "Reward on NoReward Task" ] self.inDim = len(self.getObservation()) self.outDim = self.env.actLen self.dif = (self.env.fraktMax - self.env.fraktMin) * self.env.dists[0] self.maxSpeed = self.dif / 30.0 self.picCount = 0 self.epiLen = 1
def __init__(self, env=None, maxsteps=1000): """ @param env: (optional) an instance of a CartPoleEnvironment (or a subclass thereof) @param maxsteps: maximal number of steps (default: 1000) """ if env == None: env = ShipSteeringEnvironment() EpisodicTask.__init__(self, env) self.N = maxsteps self.t = 0 # scale sensors # [h, hdot, v] self.sensor_limits = [(-180.0, +180.0), (-180.0, +180.0), (-10.0, +40.0)] # actions: thrust, rudder self.actor_limits = [(-1.0, +2.0), (-90.0, +90.0)] # scale reward over episode, such that max. return = 100 self.rewardscale = 100.0 / maxsteps / self.sensor_limits[2][1]
def __init__(self, env): EpisodicTask.__init__(self, env) self.maxPower=100.0 #Overall maximal tourque - is multiplied with relative max tourque for individual joint to get individual max tourque self.reward_history = [] self.count = 0 #timestep counter self.epiLen=500 #suggestet episodic length for normal Johnnie tasks self.incLearn=0 #counts the task resets for incrementall learning self.env.FricMu=20.0 #We need higher friction for Johnnie self.env.dt=0.01 #We also need more timly resolution # normalize standard sensors to (-1, 1) self.sensor_limits=[] #Angle sensors for i in range(self.env.actLen): self.sensor_limits.append((self.env.cLowList[i], self.env.cHighList[i])) # Joint velocity sensors for i in range(self.env.actLen): self.sensor_limits.append((-20, 20)) #Norm all actor dimensions to (-1, 1) self.actor_limits = [(-1, 1)]*env.actLen
def __init__(self, env=None, maxsteps=1000): """ @param env: (optional) an instance of a CartPoleEnvironment (or a subclass thereof) @param maxsteps: maximal number of steps (default: 1000) """ if env == None: env = ShipSteeringEnvironment() EpisodicTask.__init__(self, env) self.N = maxsteps self.t = 0 # scale sensors # [h, hdot, v] self.sensor_limits = [(-180.0, +180.0), (-180.0, +180.0), (-10.0, +40.0)] # actions: thrust, rudder self.actor_limits = [(-1.0, +2.0), (-90.0, +90.0)] # scale reward over episode, such that max. return = 100 self.rewardscale = 100. / maxsteps / self.sensor_limits[2][1]
def __init__(self, env): EpisodicTask.__init__(self, env) self.maxPower = 100.0 #Overall maximal tourque - is multiplied with relative max tourque for individual joint to get individual max tourque self.reward_history = [] self.count = 0 #timestep counter self.epiLen = 500 #suggestet episodic length for normal Johnnie tasks self.incLearn = 0 #counts the task resets for incrementall learning self.env.FricMu = 20.0 #We need higher friction for Johnnie self.env.dt = 0.01 #We also need more timly resolution # normalize standard sensors to (-1, 1) self.sensor_limits = [] #Angle sensors for i in range(self.env.actLen): self.sensor_limits.append( (self.env.cLowList[i], self.env.cHighList[i])) # Joint velocity sensors for i in range(self.env.actLen): self.sensor_limits.append((-20, 20)) #Norm all actor dimensions to (-1, 1) self.actor_limits = [(-1, 1)] * env.actLen
def __init__(self, env): EpisodicTask.__init__(self, env) #Overall maximal tourque - is multiplied with relative max tourque for individual joint. self.maxPower = 100.0 self.reward_history = [] self.count = 0 #timestep counter self.epiLen = 1500 #suggestet episodic length for normal Johnnie tasks self.incLearn = 0 #counts the task resets for incrementall learning self.env.FricMu = 20.0 #We need higher friction for CCRL self.env.dt = 0.002 #We also need more timly resolution # normalize standard sensors to (-1, 1) self.sensor_limits = [] #Angle sensors for i in range(self.env.actLen): self.sensor_limits.append( (self.env.cLowList[i], self.env.cHighList[i])) # Joint velocity sensors for i in range(self.env.actLen): self.sensor_limits.append((-20, 20)) #Norm all actor dimensions to (-1, 1) self.actor_limits = [(-1, 1)] * env.actLen self.oldAction = zeros(env.actLen, float) self.dist = zeros(9, float) self.dif = array([0.0, 0.0, 0.0]) self.target = array([-6.5, 1.75, -10.5]) self.grepRew = 0.0 self.tableFlag = 0.0 self.env.addSensor(SpecificBodyPositionSensor(['objectP00'], "glasPos")) self.env.addSensor(SpecificBodyPositionSensor(['palmLeft'], "palmPos")) self.env.addSensor( SpecificBodyPositionSensor(['fingerLeft1'], "finger1Pos")) self.env.addSensor( SpecificBodyPositionSensor(['fingerLeft2'], "finger2Pos")) #we changed sensors so we need to update environments sensorLength variable self.env.obsLen = len(self.env.getSensors()) #normalization for the task spezific sensors for i in range(self.env.obsLen - 2 * self.env.actLen): self.sensor_limits.append((-4, 4))
def __init__(self, env=None, maxsteps=1000): """ @param env: (optional) an instance of a CartPoleEnvironment (or a subclass thereof) @param maxsteps: maximal number of steps (default: 1000) """ if env == None: env = CartPoleEnvironment() EpisodicTask.__init__(self, env) self.N = maxsteps self.t = 0 # scale position and angle, don't scale velocities (unknown maximum) self.sensor_limits = [(-3, 3)] # , None, (-pi, pi), None] for i in range(1, self.outdim): if isinstance(self.env, NonMarkovPoleEnvironment) and i % 2 == 0: self.sensor_limits.append(None) else: self.sensor_limits.append((-pi, pi)) self.sensor_limits = [None] * 4 # actor between -10 and 10 Newton self.actor_limits = [(-10, 10)]
def __init__(self, env=None, maxsteps=1000): """ @param env: (optional) an instance of a CartPoleEnvironment (or a subclass thereof) @param maxsteps: maximal number of steps (default: 1000) """ if env == None: env = CartPoleEnvironment() EpisodicTask.__init__(self, env) self.N = maxsteps self.t = 0 # scale position and angle, don't scale velocities (unknown maximum) self.sensor_limits = [(-3, 3)] #, None, (-pi, pi), None] for i in range(1, self.outdim): if isinstance(self.env, NonMarkovPoleEnvironment) and i % 2 == 0: self.sensor_limits.append(None) else: self.sensor_limits.append((-pi, pi)) self.sensor_limits = [None] * 4 # actor between -10 and 10 Newton self.actor_limits = [(-10, 10)]
def __init__(self, env): EpisodicTask.__init__(self, env) #Overall maximal tourque - is multiplied with relative max tourque for individual joint. self.maxPower=100.0 self.reward_history = [] self.count = 0 #timestep counter self.epiLen=1500 #suggestet episodic length for normal Johnnie tasks self.incLearn=0 #counts the task resets for incrementall learning self.env.FricMu=20.0 #We need higher friction for CCRL self.env.dt=0.002 #We also need more timly resolution # normalize standard sensors to (-1, 1) self.sensor_limits=[] #Angle sensors for i in range(self.env.actLen): self.sensor_limits.append((self.env.cLowList[i], self.env.cHighList[i])) # Joint velocity sensors for i in range(self.env.actLen): self.sensor_limits.append((-20, 20)) #Norm all actor dimensions to (-1, 1) self.actor_limits = [(-1, 1)]*env.actLen self.oldAction=zeros(env.actLen, float) self.dist=zeros(9, float) self.dif=array([0.0,0.0,0.0]) self.target=array([-6.5,1.75,-10.5]) self.grepRew=0.0 self.tableFlag=0.0 self.env.addSensor(SpecificBodyPositionSensor(['objectP00'], "glasPos")) self.env.addSensor(SpecificBodyPositionSensor(['palmLeft'], "palmPos")) self.env.addSensor(SpecificBodyPositionSensor(['fingerLeft1'], "finger1Pos")) self.env.addSensor(SpecificBodyPositionSensor(['fingerLeft2'], "finger2Pos")) #we changed sensors so we need to update environments sensorLength variable self.env.obsLen=len(self.env.getSensors()) #normalization for the task spezific sensors for i in range(self.env.obsLen-2*self.env.actLen): self.sensor_limits.append((-4, 4))
def reset(self): EpisodicTask.reset(self) self.t = 0
def performAction(self, action): self.t += 1 EpisodicTask.performAction(self, action)
def __init__(self, environment): EpisodicTask.__init__(self, environment) self.N = 15 self.t = 0 self.state = [0.0] * environment.dim self.action = [0.0] * environment.dim
def performAction(self, action): EpisodicTask.performAction(self, action) self.action = action
def getObservation(self): self.state = EpisodicTask.getObservation(self) return self.state
def reset(self): self.steps = 0 EpisodicTask.reset(self)
def __init__(self): self.environment = SimpleraceEnvironment() EpisodicTask.__init__(self, self.environment) self.t = 0