def __init__(self, environment): EpisodicTask.__init__(self, environment) self.reward_history = [] self.count = 0 # normalize to (-1, 1) self.sensor_limits = [(-pi, pi), (-20, 20)] self.actor_limits = [(-1, 1)]
def __init__(self, env): EpisodicTask.__init__(self, env) self.step = 0 self.epiStep = 0 self.reward = [0.0] self.rawReward = 0.0 self.obsSensors = ["EdgesReal"] self.rewardSensor = [""] self.oldReward = 0.0 self.plotString = ["World Interactions", "Reward", "Reward on NoReward Task"] self.inDim = len(self.getObservation()) self.outDim = self.env.actLen self.dif = (self.env.fraktMax - self.env.fraktMin) * self.env.dists[0] self.maxSpeed = self.dif / 30.0 self.picCount = 0 self.epiLen = 1
def __init__(self, env=None, maxsteps=1000): """ :key env: (optional) an instance of a CartPoleEnvironment (or a subclass thereof) :key maxsteps: maximal number of steps (default: 1000) """ if env == None: env = CartPoleEnvironment() EpisodicTask.__init__(self, env) self.N = maxsteps self.t = 0 # no scaling of sensors self.sensor_limits = [None] * 2 # scale actor self.actor_limits = [(-50, 50)]
def __init__(self, env=None, maxsteps=1000): """ :key env: (optional) an instance of a ShipSteeringEnvironment (or a subclass thereof) :key maxsteps: maximal number of steps (default: 1000) """ if env == None: env = ShipSteeringEnvironment(render=False) EpisodicTask.__init__(self, env) self.N = maxsteps self.t = 0 # scale sensors # [h, hdot, v] self.sensor_limits = [(-180.0, +180.0), (-180.0, +180.0), (-10.0, +40.0)] # actions: thrust, rudder self.actor_limits = [(-1.0, +2.0), (-90.0, +90.0)] # scale reward over episode, such that max. return = 100 self.rewardscale = 100. / maxsteps / self.sensor_limits[2][1]
def __init__(self, env): EpisodicTask.__init__(self, env) self.maxPower = 100.0 #Overall maximal tourque - is multiplied with relative max tourque for individual joint to get individual max tourque self.reward_history = [] self.count = 0 #timestep counter self.epiLen = 500 #suggestet episodic length for normal Johnnie tasks self.incLearn = 0 #counts the task resets for incrementall learning self.env.FricMu = 20.0 #We need higher friction for Johnnie self.env.dt = 0.01 #We also need more timly resolution # normalize standard sensors to (-1, 1) self.sensor_limits = [] #Angle sensors for i in range(self.env.actLen): self.sensor_limits.append((self.env.cLowList[i], self.env.cHighList[i])) # Joint velocity sensors for i in range(self.env.actLen): self.sensor_limits.append((-20, 20)) #Norm all actor dimensions to (-1, 1) self.actor_limits = [(-1, 1)] * env.actLen
def __init__(self, env): EpisodicTask.__init__(self, env) #Overall maximal tourque - is multiplied with relative max tourque for individual joint. self.maxPower = 100.0 self.reward_history = [] self.count = 0 #timestep counter self.epiLen = 1500 #suggestet episodic length for normal Johnnie tasks self.incLearn = 0 #counts the task resets for incrementall learning self.env.FricMu = 20.0 #We need higher friction for CCRL self.env.dt = 0.002 #We also need more timly resolution # normalize standard sensors to (-1, 1) self.sensor_limits = [] #Angle sensors for i in range(self.env.actLen): self.sensor_limits.append( (self.env.cLowList[i], self.env.cHighList[i])) # Joint velocity sensors for i in range(self.env.actLen): self.sensor_limits.append((-20, 20)) #Norm all actor dimensions to (-1, 1) self.actor_limits = [(-1, 1)] * env.actLen self.oldAction = zeros(env.actLen, float) self.dist = zeros(9, float) self.dif = array([0.0, 0.0, 0.0]) self.target = array([-6.5, 1.75, -10.5]) self.grepRew = 0.0 self.tableFlag = 0.0 self.env.addSensor(SpecificBodyPositionSensor(['objectP00'], "glasPos")) self.env.addSensor(SpecificBodyPositionSensor(['palmLeft'], "palmPos")) self.env.addSensor( SpecificBodyPositionSensor(['fingerLeft1'], "finger1Pos")) self.env.addSensor( SpecificBodyPositionSensor(['fingerLeft2'], "finger2Pos")) #we changed sensors so we need to update environments sensorLength variable self.env.obsLen = len(self.env.getSensors()) #normalization for the task spezific sensors for i in range(self.env.obsLen - 2 * self.env.actLen): self.sensor_limits.append((-4, 4))
def __init__(self, env=None, maxsteps=1000, desiredValue = 0): """ :key env: (optional) an instance of a CartPoleEnvironment (or a subclass thereof) :key maxsteps: maximal number of steps (default: 1000) """ self.desiredValue = desiredValue if env == None: env = CartPoleEnvironment() EpisodicTask.__init__(self, env) self.N = maxsteps self.t = 0 # scale position and angle, don't scale velocities (unknown maximum) self.sensor_limits = [(-3, 3)]#, None, (-pi, pi), None] for i in range(1, self.outdim): if isinstance(self.env, NonMarkovPoleEnvironment) and i % 2 == 0: self.sensor_limits.append(None) else: self.sensor_limits.append((-pi, pi)) self.sensor_limits = [None] * 4 # actor between -10 and 10 Newton self.actor_limits = [(-50, 50)]
def __init__(self): self.environment = SimpleraceEnvironment() EpisodicTask.__init__(self, self.environment) self.t = 0
def __init__(self, environment): EpisodicTask.__init__(self, environment) self.N = 15 self.t = 0 self.state = [0.0] * environment.dim self.action = [0.0] * environment.dim