Beispiel #1
0
    def __init__(self):

        CopterEnv.__init__(self)

        # Initialize state
        self._init()

        # Action space = motors, rescaled from [0,1] to [-1,+1]
        self.action_space = spaces.Box(np.array([-1] * 4), np.array([1] * 4))

        # Observation space = full state space
        self.observation_space = spaces.Box(np.array([-np.inf] * 12),
                                            np.array([+np.inf] * 12))
Beispiel #2
0
    def __init__(self, dt=.001, target=10, tolerance=1.0):

        CopterEnv.__init__(self, dt)

        self.target = target
        self.tolerance = tolerance

        # Action space = motors, rescaled from [0,1] to [-1,+1]
        self.action_space = spaces.Box(np.array([-1]), np.array([1]))

        # Observation space = altitude, vertical_velocity
        self.observation_space = spaces.Box(np.array([0,-np.inf]), np.array([np.inf,np.inf]))

        self._init()
Beispiel #3
0
    def __init__(self):

        CopterEnv.__init__(self, statedims=15)

        # Initialize state
        self._init()

        # Action space = motors, rescaled from [0,1] to [-1,+1]
        self.action_space = spaces.Box(np.array([-1]*4), np.array([1]*4))

        # Observation space = full state space plus target position
        self.observation_space = spaces.Box(np.array([-np.inf]*15), np.array([+np.inf]*15))

        self.target_theta = 0
Beispiel #4
0
    def step(self, action):

        # Rescale action from [-1,+1] to [0,1]
        motor = (1 + action[0]) / 2

        # Use it for all four motors
        motors = motor * np.ones(4)

        # Call parent-class step() to do basic update
        CopterEnv._update(self, motors)

        # Dynamics uses NED coordinates, so negate to get altitude and vertical velocity
        altitude = -self.state[4]
        velocity = -self.state[5]

        # Get a reward for every timestep on target
        self.reward += (int)(abs(altitude - self.target) < self.tolerance)

        # False = max_episodes in registry determines whether we're done
        return (altitude, velocity), self.reward, False, {}
Beispiel #5
0
    def step(self, action):

        # Rescale action from [-1,+1] to [0,1]
        motors = (1 + action) / 2

        # Call parent-class method to do basic state update, return whether vehicle crashed
        crashed = CopterEnv._update(self, motors)

        # Integrate position
        self.position += self.state[0:5:2]

        # Reward is logarithm of Euclidean distance from origin
        reward = np.sqrt(np.sum(self.position[0:2]**2))

        return self.state, reward, crashed, {}
Beispiel #6
0
    def step(self, action):

        # Rescale action from [-1,+1] to [0,1]
        motors = (1 + action) / 2

        # Call parent-class method to do basic state update, return whether vehicle crashed
        crashed = CopterEnv._update(self, motors)

        # Update target position.  For now we just make it go in circles
        self.state[12] = 10 * np.cos(self.target_theta)
        self.state[13] = 10 * np.sin(self.target_theta)
        self.target_theta += .0025

        # Fake up reward for now
        reward = 0

        return self.state, reward, crashed, {}
    def step(self, action):

        # Rescale action from [-1,+1] to [0,1]
        motor = (1 + action[0]) / 2

        # Use it for all four motors
        motors = motor * np.ones(4)

        # Call parent-class step() to do basic update
        crashed = CopterEnv._update(self, motors)

        # Dynamics uses NED coordinates, so negate to get altitude and vertical velocity
        altitude = -self.state[4]
        velocity = -self.state[5]

        #costs = (altitude-self.target)**2 + .1*velocity**2 + .001*(motor**2)
        costs = (altitude - self.target)**2 + velocity**2

        # False = max_episodes in registry determines whether we're done
        return (altitude, velocity), -costs, crashed, {}
Beispiel #8
0
 def reset(self):
     CopterEnv.reset(self)
     self._init()
     return -self.state[4:6]