Esempio n. 1
0
class DoublePoleEnvironment(Environment):
    """ two poles to be balanced from the same cart. """

    indim = 1
    ooutdim = 6

    def __init__(self):
        self.p1 = CartPoleEnvironment()
        self.p2 = CartPoleEnvironment()
        self.p2.l = 0.05
        self.p2.mp = 0.01
        self.reset()

    def getSensors(self):
        """ returns the state one step (dt) ahead in the future. stores the state in
            self.sensors because it is needed for the next calculation. The sensor return
            vector has 6 elements: theta1, theta1', theta2, theta2', s, s'
            (s being the distance from the origin).
        """
        s1 = self.p1.getSensors()
        s2 = self.p2.getSensors()
        self.sensors = (s1[0], s1[1], s2[0], s2[1], s2[2], s2[3])
        return self.sensors

    def reset(self):
        """ re-initializes the environment, setting the cart back in a random position.
        """
        self.p1.reset()
        self.p2.reset()
        # put cart in the same place:
        self.p2.sensors = (self.p2.sensors[0], self.p2.sensors[1],
                           self.p1.sensors[2], self.p1.sensors[3])
        self.getSensors()

    def performAction(self, action):
        """ stores the desired action for the next runge-kutta step.
        """
        self.p1.performAction(action)
        self.p2.performAction(action)

    def getCartPosition(self):
        """ auxiliary access to just the cart position, to be used by BalanceTask """
        return self.sensors[4]

    def getPoleAngles(self):
        """ auxiliary access to just the pole angle(s), to be used by BalanceTask """
        return [self.sensors[0], self.sensors[2]]
Esempio n. 2
0
class DoublePoleEnvironment(Environment):
    """ two poles to be balanced from the same cart. """

    indim = 1
    ooutdim = 6

    def __init__(self):
        self.p1 = CartPoleEnvironment()
        self.p2 = CartPoleEnvironment()
        self.p2.l = 0.05
        self.p2.mp = 0.01
        self.reset()
        
    def getSensors(self):
        """ returns the state one step (dt) ahead in the future. stores the state in
            self.sensors because it is needed for the next calculation. The sensor return 
            vector has 6 elements: theta1, theta1', theta2, theta2', s, s' 
            (s being the distance from the origin).
        """
        s1 = self.p1.getSensors()
        s2 = self.p2.getSensors()
        self.sensors = (s1[0], s1[1], s2[0], s2[1], s2[2], s2[3])
        return self.sensors
        
    def reset(self):
        """ re-initializes the environment, setting the cart back in a random position.
        """
        self.p1.reset()
        self.p2.reset()
        # put cart in the same place:
        self.p2.sensors = (self.p2.sensors[0], self.p2.sensors[1], self.p1.sensors[2], self.p1.sensors[3])
        self.getSensors()
                    
    def performAction(self, action):
        """ stores the desired action for the next runge-kutta step.
        """
        self.p1.performAction(action)
        self.p2.performAction(action)
        
    def getCartPosition(self):
        """ auxiliary access to just the cart position, to be used by BalanceTask """
        return self.sensors[4]
        
    def getPoleAngles(self):
        """ auxiliary access to just the pole angle(s), to be used by BalanceTask """
        return [self.sensors[0], self.sensors[2]]   
        
 def getSensors(self):
     """ returns the state one step (dt) ahead in the future. stores the state in
         self.sensors because it is needed for the next calculation. The sensor return 
         vector has 2 elements: theta, s 
         (s being the distance from the origin).
     """
     tmp = CartPoleEnvironment.getSensors(self)
     return (tmp[0], tmp[2])
Esempio n. 4
0
 def getSensors(self):
     """ returns the state one step (dt) ahead in the future. stores the state in
         self.sensors because it is needed for the next calculation. The sensor return 
         vector has 2 elements: theta, s 
         (s being the distance from the origin).
     """
     tmp = CartPoleEnvironment.getSensors(self)
     return (tmp[0], tmp[2])
    def __init__(self, env=None, maxsteps=1000):
        """
        :key env: (optional) an instance of a CartPoleEnvironment (or a subclass thereof)
        :key maxsteps: maximal number of steps (default: 1000) 
        """
        if env == None:
            env = CartPoleEnvironment()
        EpisodicTask.__init__(self, env)
        self.N = maxsteps
        self.t = 0

        # no scaling of sensors
        self.sensor_limits = [None] * 2

        # scale actor
        self.actor_limits = [(-50, 50)]
Esempio n. 6
0
    def __init__(self, env=None, maxsteps=1000):
        """
        @param env: (optional) an instance of a CartPoleEnvironment (or a subclass thereof)
        @param maxsteps: maximal number of steps (default: 1000) 
        """
        if env == None:
            env = CartPoleEnvironment()
        EpisodicTask.__init__(self, env)
        self.N = maxsteps
        self.t = 0

        # scale position and angle, don't scale velocities (unknown maximum)
        self.sensor_limits = [(-3, 3)]  #, None, (-pi, pi), None]
        for i in range(1, self.outdim):
            if isinstance(self.env, NonMarkovPoleEnvironment) and i % 2 == 0:
                self.sensor_limits.append(None)
            else:
                self.sensor_limits.append((-pi, pi))

        self.sensor_limits = [None] * 4
        # actor between -10 and 10 Newton
        self.actor_limits = [(-10, 10)]
 def __init__(self):
     self.p1 = CartPoleEnvironment()
     self.p2 = CartPoleEnvironment()
     self.p2.l = 0.05
     self.p2.mp = 0.01
     self.reset()
Esempio n. 8
0
 def __init__(self):
     self.p1 = CartPoleEnvironment()
     self.p2 = CartPoleEnvironment()
     self.p2.l = 0.05
     self.p2.mp = 0.01
     self.reset()