class DoublePoleEnvironment(Environment): """ two poles to be balanced from the same cart. """ indim = 1 ooutdim = 6 def __init__(self): self.p1 = CartPoleEnvironment() self.p2 = CartPoleEnvironment() self.p2.l = 0.05 self.p2.mp = 0.01 self.reset() def getSensors(self): """ returns the state one step (dt) ahead in the future. stores the state in self.sensors because it is needed for the next calculation. The sensor return vector has 6 elements: theta1, theta1', theta2, theta2', s, s' (s being the distance from the origin). """ s1 = self.p1.getSensors() s2 = self.p2.getSensors() self.sensors = (s1[0], s1[1], s2[0], s2[1], s2[2], s2[3]) return self.sensors def reset(self): """ re-initializes the environment, setting the cart back in a random position. """ self.p1.reset() self.p2.reset() # put cart in the same place: self.p2.sensors = (self.p2.sensors[0], self.p2.sensors[1], self.p1.sensors[2], self.p1.sensors[3]) self.getSensors() def performAction(self, action): """ stores the desired action for the next runge-kutta step. """ self.p1.performAction(action) self.p2.performAction(action) def getCartPosition(self): """ auxiliary access to just the cart position, to be used by BalanceTask """ return self.sensors[4] def getPoleAngles(self): """ auxiliary access to just the pole angle(s), to be used by BalanceTask """ return [self.sensors[0], self.sensors[2]]
def getSensors(self): """ returns the state one step (dt) ahead in the future. stores the state in self.sensors because it is needed for the next calculation. The sensor return vector has 2 elements: theta, s (s being the distance from the origin). """ tmp = CartPoleEnvironment.getSensors(self) return (tmp[0], tmp[2])
def __init__(self, env=None, maxsteps=1000): """ :key env: (optional) an instance of a CartPoleEnvironment (or a subclass thereof) :key maxsteps: maximal number of steps (default: 1000) """ if env == None: env = CartPoleEnvironment() EpisodicTask.__init__(self, env) self.N = maxsteps self.t = 0 # no scaling of sensors self.sensor_limits = [None] * 2 # scale actor self.actor_limits = [(-50, 50)]
def __init__(self, env=None, maxsteps=1000): """ @param env: (optional) an instance of a CartPoleEnvironment (or a subclass thereof) @param maxsteps: maximal number of steps (default: 1000) """ if env == None: env = CartPoleEnvironment() EpisodicTask.__init__(self, env) self.N = maxsteps self.t = 0 # scale position and angle, don't scale velocities (unknown maximum) self.sensor_limits = [(-3, 3)] #, None, (-pi, pi), None] for i in range(1, self.outdim): if isinstance(self.env, NonMarkovPoleEnvironment) and i % 2 == 0: self.sensor_limits.append(None) else: self.sensor_limits.append((-pi, pi)) self.sensor_limits = [None] * 4 # actor between -10 and 10 Newton self.actor_limits = [(-10, 10)]
def __init__(self): self.p1 = CartPoleEnvironment() self.p2 = CartPoleEnvironment() self.p2.l = 0.05 self.p2.mp = 0.01 self.reset()