Beispiel #1
0
class Task():
    """Task (environment) that defines the goal and provides feedback to the agent."""
    def __init__(self, init_pose=None, init_velocities=None, 
        init_angle_velocities=None, runtime=5., target_pos=None, action_repeat=1):
        """Initialize a Task object.
        Params
        ======
            init_pose: initial position of the quadcopter in (x,y,z) dimensions and the Euler angles
            init_velocities: initial velocity of the quadcopter in (x,y,z) dimensions
            init_angle_velocities: initial radians/second for each of the three Euler angles
            runtime: time limit for each episode
            target_pos: target/goal (x,y,z) position for the agent
        """
        # Simulation
        self.sim = PhysicsSim(init_pose, init_velocities, init_angle_velocities, runtime) 
        self.action_repeat = action_repeat

        self.state_size = self.action_repeat * 6
        self.action_low = 1
        self.action_high = 900
        self.action_size = self.action_repeat * 4

        # Goal
        self.target_pos = target_pos if target_pos is not None else np.array([0., 0., 10.]) 

    def get_reward(self):
        """Uses current pose of sim to return reward."""
        reward = 10.
        if self.sim.pose[2] < (self.target_pos[2] - 3.): 
            reward += 2*(self.sim.pose[2] - self.sim.init_pose[2])
            reward -= (abs(self.sim.pose[:2] - self.target_pos[:2]) > 3.).sum()
        else: 
            reward += 10.
            reward -= 2*(abs(self.sim.pose[:3] - self.target_pos) > 3.).sum()
            reward -= np.int32(abs(self.sim.pose[2] - self.target_pos[2]) > 3.)
        
        if self.sim.done:
            if self.sim.time < self.sim.runtime: 
                reward -= 10
                
        reward = reward / 10
        return reward

    def step(self, rotor_speeds):
        """Uses action to obtain next state, reward, done."""
        reward = 0
        pose_all = []
        for _ in range(self.action_repeat):
            done = self.sim.next_timestep(rotor_speeds[(_*4):((_*4)+4)]) # update the sim pose and velocities
            reward += self.get_reward() 
            pose_all.append(self.sim.pose)
        next_state = np.concatenate(pose_all)
        return next_state, (reward / self.action_repeat), done

    def reset(self):
        """Reset the sim to start a new episode."""
        self.sim.reset()
        state = np.concatenate([self.sim.pose] * self.action_repeat) 
        return state
Beispiel #2
0
class Task():
    """Task (environment) that defines the goal and provides feedback to the agent."""
    def __init__(self,
                 init_pose=None,
                 init_velocities=None,
                 init_angle_velocities=None,
                 runtime=5.,
                 target_pos=None):
        """Initialize a Task object.
        Params
        ======
            init_pose: initial position of the quadcopter in (x,y,z) dimensions and the Euler angles
            init_velocities: initial velocity of the quadcopter in (x,y,z) dimensions
            init_angle_velocities: initial radians/second for each of the three Euler angles
            runtime: time limit for each episode
            target_pos: target/goal (x,y,z) position for the agent
        """
        # Simulation
        self.sim = PhysicsSim(init_pose, init_velocities,
                              init_angle_velocities, runtime)
        self.action_repeat = 3
        self.state_size = self.action_repeat * 6

        self.action_low = 0
        self.action_high = 900
        self.action_size = 4

        # Goal
        self.target_pos = target_pos if target_pos is not None else np.array(
            [0., 0., 10.])

    def get_reward(self):
        """Uses current pose of sim to return reward."""
        reward = 1. - .003 * abs(self.sim.pose[:3] - self.target_pos).sum()
        return reward

    def step(self, rotor_speeds):
        """Uses action to obtain next state, reward, done."""
        reward = 0
        pose_all = []
        for _ in range(self.action_repeat):
            done = self.sim.next_timestep(
                rotor_speeds)  # update the sim pose and velocities
            reward += self.get_reward()
            pose_all.append(self.sim.pose)
        next_state = np.concatenate(pose_all)
        return next_state, reward, done

    def reset(self):
        """Reset the sim to start a new episode."""
        self.sim.reset()
        state = np.concatenate([self.sim.pose] * self.action_repeat)
        return state
class Task():
    """Task (environment) that defines the goal and provides feedback to the agent."""
    def __init__(self, init_pose=None, init_velocities=None, 
        init_angle_velocities=None, runtime=5., target_pos=None):
        """Initialize a Task object.
        Params
        =======
            init_pose: initial position of the quadcopter in (x,y,z) dimensions and the Euler angles
            init_velocities: initial velocity of the quadcopter in (x,y,z) dimensions
            init_angle_velocities: initial radians/second for each of the three Euler angles
            runtime: time limit for each episode
            target_pos: target/goal (x,y,z) position for the agent
        """
        # Simulation
        self.sim = PhysicsSim(init_pose, init_velocities, init_angle_velocities, runtime) 
        self.action_repeat = 3

        self.state_size = self.action_repeat * 6
        self.action_low = 0
        self.action_high = 900
        self.action_size = 4

        # Goal
        self.target_pos = target_pos if target_pos is not None else np.array([0., 0., 10.]) 
        

    def get_reward(self):
        #Penalty For Deviating Its position from target position in X, Y, Z Dimension . 
        dist_penalty = abs((self.sim.pose[:3] - self.target_pos).sum())
        #Penalty For Deviating Its Position from target position in terms of Euler Angles . 
        angular_penalty = abs((self.sim.pose[3:]).sum())
        #Overall reward points = 10 for staying up and not crashing - (0.03 * dist_penalty + 0.06 * angular_penalty)
        reward_points = 10.0 - (0.03 * dist_penalty + 0.06 * angular_penalty )
        return reward_points

    def step(self, rotor_speeds):
        """Uses action to obtain next state, reward, done."""
        reward = 0
        pose_all = []
        for _ in range(self.action_repeat):
            done = self.sim.next_timestep(rotor_speeds) # update the sim pose and velocities
            reward += self.get_reward() 
            pose_all.append(self.sim.pose)
        next_state = np.concatenate(pose_all)
        return next_state, reward, done

    def reset(self):
        """Reset the sim to start a new episode."""
        self.sim.reset()
        state = np.concatenate([self.sim.pose] * self.action_repeat) 
        return state
Beispiel #4
0
class Task():
    """Task (environment) that defines the goal and provides feedback to the agent."""
    def __init__(self, init_pose=None, init_velocities=None, 
        init_angle_velocities=None, runtime=5., target_pos=None):
        """Initialize a Task object.
        Params
        ======
            init_pose: initial position of the quadcopter in (x,y,z) dimensions and the Euler angles
            init_velocities: initial velocity of the quadcopter in (x,y,z) dimensions
            init_angle_velocities: initial radians/second for each of the three Euler angles
            runtime: time limit for each episode
            target_pos: target/goal (x,y,z) position for the agent
        """
        # Simulation
        self.sim = PhysicsSim(init_pose, init_velocities, init_angle_velocities, runtime) 
        self.action_repeat = 3 # Paper uses 5 as the action repeat

        self.state_size = self.action_repeat * 6
        # Based on the random agent the hover/neutral rotor speed appears to be around 450, so tighten up the actions around that value
        self.action_low = 400 
        self.action_high = 500
        self.action_size = 4

        # Goal
        self.target_pos = target_pos if target_pos is not None else np.array([0., 0., 10.]) 

    def get_reward(self):
        """Uses current pose of sim to return reward."""
        reward = 1-2*np.tanh(0.3*np.linalg.norm(self.sim.pose[:3] - self.target_pos[:3]))
        if self.sim.time < 1:
            reward += self.sim.time + self.sim.v[2]  # negative tanh of the euler distance. Reward [-1 to 0]
        if self.sim.done and self.sim.time < self.sim.runtime:
            reward -= 10  # Punish the reward function for crashing
        return reward

    def step(self, rotor_speeds):
        """Uses action to obtain next state, reward, done."""
        reward = 0.
        pose_all = []
        for _ in range(self.action_repeat):
            done = self.sim.next_timestep(rotor_speeds) # update the sim pose and velocities
            reward += self.get_reward() 
            pose_all.append(self.sim.pose)
        next_state = np.concatenate(pose_all)
        return next_state, reward, done

    def reset(self):
        """Reset the sim to start a new episode."""
        self.sim.reset()
        state = np.concatenate([self.sim.pose] * self.action_repeat) 
        return state
Beispiel #5
0
class Takeoff():
    """Task (environment) that defines the goal and provides feedback to the agent."""
    def __init__(self, init_pose=None, init_velocities=None, 
        init_angle_velocities=None, runtime=5., target_height=None):
        """Initialize a Task object.
        Params
        ======
            init_pose: initial position of the quadcopter in (x,y,z) dimensions and the Euler angles
            init_velocities: initial velocity of the quadcopter in (x,y,z) dimensions
            init_angle_velocities: initial radians/second for each of the three Euler angles
            runtime: time limit for each episode
            target_pos: target/goal (x,y,z) position for the agent
        """
        # Simulation
        self.sim = PhysicsSim(init_pose, init_velocities, init_angle_velocities, runtime) 
        self.action_repeat = 3

        self.state_size = self.action_repeat * 6
        self.action_low = 0
        self.action_high = 900
        self.action_size = 4

        # Goal: Vertical movement to target height only, keeping x and y coordinates the same
        self.target_pos = np.array([self.sim.init_pose[0], self.sim.init_pose[1], target_height]) if target_height is not None else np.array([self.sim.init_pose[0], self.sim.init_pose[1], 10.]) 

    def get_reward(self):
        """Uses current pose of sim to return reward."""
        d_euclid_xy =  np.sqrt(((self.sim.pose[:2] - self.target_pos[:2])**2).sum())
        d_z = abs(self.sim.pose[2]-self.target_pos[2])
        
        reward = np.tanh(1 - 0.002*d_z - 0.001*d_euclid_xy) 
        
        return reward 

    def step(self, rotor_speeds):
        """Uses action to obtain next state, reward, done."""
        reward = 0
        pose_all = []
        for _ in range(self.action_repeat):
            done = self.sim.next_timestep(rotor_speeds) # update the sim pose and velocities
            reward += self.get_reward() 
            pose_all.append(self.sim.pose)
        next_state = np.concatenate(pose_all)
        return next_state, reward, done

    def reset(self):
        """Reset the sim to start a new episode."""
        self.sim.reset()
        state = np.concatenate([self.sim.pose] * self.action_repeat) 
        return state
Beispiel #6
0
class Takeoff():
    def __init__(self,
                 init_pose=np.array([0., 0., 0., .1, .1, .1]),
                 init_velocities=None,
                 init_angle_velocities=None,
                 runtime=5.,
                 target_height=10.):
        """Initialize a Task object.
        Params
        ======
            init_pose: initial position of the quadcopter in (x,y,z) dimensions and the Euler angles
            init_velocities: initial velocity of the quadcopter in (x,y,z) dimensions
            init_angle_velocities: initial radians/second for each of the three Euler angles
            runtime: time limit for each episode
            target_pos: target/goal (x,y,z) position for the agent
        """
        # Simulation
        self.sim = PhysicsSim(init_pose, init_velocities,
                              init_angle_velocities, runtime)
        self.action_repeat = 3

        self.state_size = self.action_repeat * 6
        self.action_low = 0
        self.action_high = 900
        self.action_size = 4

        # Goal
        self.target_x = self.sim.pose[0]
        self.target_y = self.sim.pose[1]
        self.target_z = target_height

    def get_reward(self):
        reward = 1. - .3 * (abs(self.sim.pose[2] - self.target_z)).sum()
        return reward

    def step(self, rotor_speeds):
        reward = 0
        pose_all = []
        for _ in range(self.action_repeat):
            done = self.sim.next_timestep(rotor_speeds)
            reward += self.get_reward()
            pose_all.append(self.sim.pose)
        next_state = np.concatenate(pose_all)
        return next_state, reward, done

    def reset(self):
        self.sim.reset()
        state = np.concatenate([self.sim.pose] * self.action_repeat)
        return state
Beispiel #7
0
class Task():
    """Task (environment) that defines the goal and provides feedback to the agent."""
    def __init__(self, init_pose=None, init_velocities=None, 
        init_angle_velocities=None, runtime=5., target_pos=None):
        """Initialize a Task object.
        Params
        ======
            init_pose: initial position of the quadcopter in (x,y,z) dimensions and the Euler angles
            init_velocities: initial velocity of the quadcopter in (x,y,z) dimensions
            init_angle_velocities: initial radians/second for each of the three Euler angles
            runtime: time limit for each episode
            target_pos: target/goal (x,y,z) position for the agent
        """
        # Simulation
        self.sim = PhysicsSim(init_pose, init_velocities, init_angle_velocities, runtime) 
        self.action_repeat = 3
        self.state_size = self.action_repeat * 6  # 6 states x 3 repeat = 18 
        self.action_low = 350 # 0
        self.action_high = 700 # 900
        self.action_size = 1 # 4

        # Goal : taking-off, hovering, going to a place or landing.
        self.target_pos = target_pos 

    def get_reward(self):
        """Uses current pose of sim to return reward."""
        reward = 1.0 - 0.06 * abs(self.target_pos[2] - self.sim.pose[2]) - 0.04 * abs(self.sim.pose[0:1] - self.target_pos[0:1]).sum() - 0.02 * abs(self.sim.pose[3:6]).sum()
        return reward

    def step(self, rotor_speeds):
        """Uses action to obtain next state, reward, done."""
        rotor_speeds = [rotor_speeds[0], rotor_speeds[0], rotor_speeds[0], rotor_speeds[0]]
        reward = 0
        pose_all = []
        for _ in range(self.action_repeat):
            done = self.sim.next_timestep(rotor_speeds) # update the sim pose and velocities
            reward += self.get_reward() 
            pose_all.append(self.sim.pose)
        next_state = np.concatenate(pose_all)
        return next_state, reward, done

    def reset(self):
        """Reset the sim to start a new episode."""
        self.sim.reset()
        state = np.concatenate([self.sim.pose] * self.action_repeat) 
        return state
class TakeoffTask():
    """Task (environment) that defines the goal and provides feedback to the agent."""
    def __init__(self, init_height, target_height, runtime=5):
        """Initialize a Task object.
        Params
        ======
            init_height: initial height of the quadcopter in z dimension to start from
            target_height: target height of the quadcopter in z dimension to reach for successful takeoff
            runtime: time limit for each episode
        """
        # Simulation
        self.init_height = init_height
        self.target_height = target_height
        self.runtime = runtime
        self.sim = PhysicsSim(init_pose=np.array(
            [0., 0., init_height, 0., 0., 0.]),
                              init_velocities=np.array([0., 0., 0.]),
                              init_angle_velocities=np.array([0., 0., 0.]),
                              runtime=runtime)

        self.state_size = len(self.create_state())
        self.action_low = 0
        self.action_high = 900
        self.action_size = 1

    def get_reward(self, actual_height):
        # reward is just the difference between the current and the target height
        return -abs(actual_height - self.target_height)

    def step(self, rotor_speed):
        """Uses action to obtain next state, reward, done."""
        done = self.sim.next_timestep(rotor_speed * 4)
        return self.create_state(), self.get_reward(
            self.sim.pose[2]), self.sim.pose[2] >= self.target_height or done

    def reset(self):
        """Reset the sim to start a new episode."""
        self.sim.reset()
        state = [self.create_state()]
        return state

    def create_state(self):
        return np.array(
            [self.sim.pose[2], self.sim.v[2], self.sim.linear_accel[2]])
Beispiel #9
0
class Task():
    def __init__(self):
        self.sim = PhysicsSim(init_pose=TARGET_POSE, runtime=RUNTIME)
        self.state_size = self.state.shape[0]
        self.action_low = 0
        self.action_high = 900
        self.action_size = 4

    def get_reward(self):
        error = np.concatenate([
            self.get_position_error(),
            self.get_orientation_error(),
        ])
        reward = -np.sqrt(np.mean(error**2))
        return reward

    def get_position_error(self):
        error = TARGET_POSE[:3] - self.sim.pose[:3]
        error = np.abs(error)
        return error

    def get_orientation_error(self):
        error = TARGET_POSE[3:6] - self.sim.pose[3:6]
        error = np.remainder(error, 2.0 * np.pi)
        error[error > np.pi] -= 2.0 * np.pi
        error = np.abs(error)
        return error

    @property
    def state(self):
        return np.concatenate([
            self.sim.pose, self.sim.v, self.sim.angular_v,
            self.sim.linear_accel, self.sim.angular_accels
        ])

    def step(self, rotorSpeeds):
        done = self.sim.next_timestep(rotorSpeeds)
        reward = self.get_reward()
        nextState = self.state
        return nextState, reward, done

    def reset(self):
        self.sim.reset()
        return self.state
Beispiel #10
0
class Task():
    """Main class that defines the goal and provides feedback to the agent."""
    def __init__(self, init_pose, init_velocities, init_angle_velocities,
                 runtime, target_pos):
        """Initialize a Task object.
        Params
        ======
            init_pose: initial position of the quadcopter in (x,y,z) dimensions and the Euler angles
            init_velocities: initial velocity of the quadcopter in (x,y,z) dimensions
            init_angle_velocities: initial radians/second for each of the three Euler angles
            runtime: time limit for each episode
            target_pos: target/goal (x,y,z) position for the agent
        """
        # Simulation
        self.sim = PhysicsSim(init_pose, init_velocities,
                              init_angle_velocities, runtime)
        self.action_repeat = 3

        # Environment
        self.state_size = self.action_repeat * 6
        self.action_low = 0
        self.action_high = 900
        self.action_size = 4

        # Target position
        self.target_pos = target_pos

    def _step(self, rotor_speeds, reward_func):
        """Uses action to obtain next state, reward, done."""
        reward = 0
        pose_all = []
        for _ in range(self.action_repeat):
            # update the sim pose and velocities
            done = self.sim.next_timestep(rotor_speeds)
            reward += reward_func()
            pose_all.append(self.sim.pose)
        next_state = np.concatenate(pose_all)
        return next_state, reward, done

    def reset(self):
        """Reset the sim to start a new episode."""
        self.sim.reset()
        state = np.concatenate([self.sim.pose] * self.action_repeat)
        return state
class Task():
    def __init__(self,
                 init_pose=None,
                 init_velocities=None,
                 init_angle_velocities=None,
                 runtime=5.,
                 target_pos=None):

        # Simulation
        self.sim = PhysicsSim(init_pose, init_velocities,
                              init_angle_velocities, runtime)
        self.action_repeat = 3

        self.state_size = self.action_repeat * 6
        self.action_low = 0
        self.action_high = 900
        self.action_size = 4

        self.target_pos = target_pos if target_pos is not None else np.array(
            [0., 0., 10.])

    def get_reward(self):
        reward = 1. - .1 * (abs(self.sim.pose[:3] - self.target_pos)).sum()
        return reward

    def step(self, rotor_speeds):
        reward = 0
        pose_all = []
        for _ in range(self.action_repeat):
            done = self.sim.next_timestep(rotor_speeds)
            reward += self.get_reward()
            pose_all.append(self.sim.pose)
        next_state = np.concatenate(pose_all)
        return next_state, reward, done

    def reset(self):
        self.sim.reset()
        state = np.concatenate([self.sim.pose] * self.action_repeat)
        return state
Beispiel #12
0
class Task():
    """Task (environment) that defines the goal and provides feedback to the agent."""
    def __init__(self,
                 init_pose=None,
                 init_velocities=None,
                 init_angle_velocities=None,
                 runtime=5.,
                 target_pos=None,
                 initial_pos=None):
        """Initialize a Task object.
        Params
        ======
            init_pose: initial position of the quadcopter in (x,y,z) dimensions and the Euler angles
            init_velocities: initial velocity of the quadcopter in (x,y,z) dimensions
            init_angle_velocities: initial radians/second for each of the three Euler angles
            runtime: time limit for each episode
            target_pos: target/goal (x,y,z) position for the agent
        """
        # Simulation
        self.sim = PhysicsSim(init_pose, init_velocities,
                              init_angle_velocities, runtime)
        self.action_repeat = 3

        self.state_size = self.action_repeat * 12
        self.action_low = 0
        self.action_high = 900
        self.action_size = 4

        # Goal
        self.target_pos = target_pos if target_pos is not None else np.array(
            [0., 0., 100.])
        self.init_pose = init_pose[:3] if init_pose is not None else np.array(
            [0., 0., 10.])
        self.distance_to_cover = self.distanceBetweenPoints(
            self.init_pose, self.target_pos)

    def get_reward(self):
        """Uses current pose of sim to return reward."""
        # reward = 1.-.003*(abs(self.sim.pose[:3] - self.target_pos)).sum()
        # labels = ['time', 'pose', 'v', 'angular_v', 'linear_accel', 'angular_accels']
        # print('time', "--", self.sim.time)
        # print('pose', "--", self.sim.pose)
        # print('v', "--", self.sim.v)
        # # print('angular_v', "--", self.angular_v)
        # print('linear_accel', "--", self.sim.linear_accel)
        # print('angular_accels', "--", self.sim.angular_accels)
        # print('target_pos', "--", self.target_pos)
        # print("---------------------------\n\n")
        #print(self.init_pose, self.sim.pose[:3], self.target_pos, self.distance_to_cover)
        #print(self.distanceBetweenPoints(self.initial_pos, self.sim.pose[:3]))
        #print(self.distanceBetweenPoints(self.initial_pos, self.target_pos))
        # if( self.distanceBetweenPoints(self.target_pos, self.sim.pose[:3]) < 0.1*self.distance_to_cover):
        #     reward += 50
        #     print("wwwooooowwwwww")
        # print(self.distanceBetweenPoints(self.target_pos, self.sim.pose[:3]))
        d = self.distanceBetweenPoints(self.target_pos, self.sim.pose[:3])
        reward = self.distance_to_cover / d
        if (d < 0.1):
            reward += 1.6 * reward  # + reward/np.abs(self.sim.v).sum()
        elif (d < 0.2):
            reward += 0.8 * reward  # + reward/np.abs(self.sim.v).sum()
        elif (d < 0.3):
            reward += 0.4 * reward  # + reward/np.abs(self.sim.v).sum()
        else:
            reward += 0.2 * reward  # + np.abs(self.sim.v).sum()
        return reward

    def step(self, rotor_speeds):
        """Uses action to obtain next state, reward, done."""
        reward = 0
        pose_all = []
        for _ in range(self.action_repeat):
            done = self.sim.next_timestep(
                rotor_speeds)  # update the sim pose and velocities
            reward += self.get_reward()
            pose_all.append(self.sim.pose)
            pose_all.append(self.sim.v)
            pose_all.append(self.sim.angular_accels)
            # if done:
            #     reward += 1
        next_state = np.concatenate(pose_all)
        info = {}
        if done:
            info = {
                "target_pos": self.target_pos,
                "final_pose": self.sim.pose[:3],
                "init_pose": self.init_pose
            }
        return next_state, reward, done, info

    def reset(self):
        """Reset the sim to start a new episode."""
        self.sim.reset()
        state = np.concatenate(
            [self.sim.pose, self.sim.v, self.sim.angular_accels] *
            self.action_repeat)
        return state

    def distanceBetweenPoints(self, p1, p2):
        return math.sqrt((p1[0] - p2[0])**2 + (p1[1] - p2[1])**2 +
                         (p1[2] - p2[2])**2)
Beispiel #13
0
class Task():
    """Task (environment) that defines the goal and provides feedback to the agent."""
    def __init__(self,
                 init_pose=None,
                 init_velocities=None,
                 init_angle_velocities=None,
                 runtime=5.,
                 target_pos=None):
        """Initialize a Task object.
        Params
        ======
            init_pose: initial position of the quadcopter in (x,y,z) dimensions and the Euler angles
            init_velocities: initial velocity of the quadcopter in (x,y,z) dimensions
            init_angle_velocities: initial radians/second for each of the three Euler angles
            runtime: time limit for each episode
            target_pos: target/goal (x,y,z) position for the agent
        """
        # Simulation
        self.sim = PhysicsSim(init_pose, init_velocities,
                              init_angle_velocities, runtime)
        self.action_repeat = 3

        self.state_size = self.action_repeat * 6
        self.action_low = 0
        self.action_high = 900
        self.action_size = 4

        # Goal
        self.target_pos = target_pos if target_pos is not None else np.array(
            [0., 0., 10.])

    def get_reward(self):
        """Uses current pose of sim to return reward."""
        self.dimension_weights = np.array(
            [1, 1., 0.]
        )  #weights for each velocity axis (wieght of z is set to zero as to not penalize upwards velocities)
        vel_vector = self.sim.v
        distance = np.multiply(
            vel_vector, self.dimension_weights) / np.linalg.norm(
                self.dimension_weights)  #scale velocities according to weights

        reward = 2 * (
            1 - np.tanh(np.linalg.norm(2 * distance))**2
        ) - 1  #reward function corresponding to a slightly modified derivative of tanh()
        #returns 1 when in zero, and decays to -1 on both infinities
        #this alongside the velocity weights, aims to reduce movement on the x-y plane
        reward = reward / 3
        reward += (np.clip(self.sim.v[2], -5, 5) / 5) * 2 / 3

        # Penalize if the quadcopter crashes
        if self.sim.done and self.sim.runtime > self.sim.time:
            reward = -10

        return reward

    def step(self, rotor_speeds):
        """Uses action to obtain next state, reward, done."""
        reward = 0
        pose_all = []
        for _ in range(self.action_repeat):
            done = self.sim.next_timestep(
                rotor_speeds)  # update the sim pose and velocities
            reward += self.get_reward()
            pose_all.append(self.sim.pose)
        next_state = np.concatenate(pose_all)
        return next_state, reward, done

    def reset(self):
        """Reset the sim to start a new episode."""
        self.sim.reset()
        state = np.concatenate([self.sim.pose] * self.action_repeat)
        return state
Beispiel #14
0
class Task():
    """Task (environment) that defines the goal and provides feedback to the agent."""
    def __init__(self,
                 init_pose=None,
                 init_velocities=None,
                 init_angle_velocities=None,
                 runtime=5.,
                 target_pos=None):
        """Initialize a Task object.
        Params
        ======
            init_pose: initial position of the quadcopter in (x,y,z) dimensions and the Euler angles
            init_velocities: initial velocity of the quadcopter in (x,y,z) dimensions
            init_angle_velocities: initial radians/second for each of the three Euler angles
            runtime: time limit for each episode
            target_pos: target/goal (x,y,z) position for the agent
        """
        # Simulation
        self.sim = PhysicsSim(init_pose, init_velocities,
                              init_angle_velocities, runtime)
        self.action_repeat = 3

        self.init_pose = init_pose
        self.init_velocities = init_velocities
        self.init_angle_velcities = init_angle_velocities

        self.state_size = self.action_repeat * 7
        self.action_low = 0
        self.action_high = 900
        self.action_size = 4

        self.init_pose = np.array(init_pose if init_pose is not None else np.
                                  array([25., 25., 120., 0, 0, 0]))

        # Goal
        self.target_pos = np.array(
            target_pos if target_pos is not None else np.array([0., 0., 10.]))

        self.dist = abs(self.init_pose[:3] - self.target_pos).sum()
        self.last_dist = self.dist

        self.init_dist = abs(self.init_pose[:3] - self.target_pos).sum()
        self.init_vdist = abs(self.sim.pose[2] - self.target_pos[2])
        self.init_hdist = abs(self.sim.pose[:2] - self.target_pos[:2]).sum()
        self.last_vdist = self.init_vdist
        self.last_hdist = self.init_hdist
        self.last_pos = np.array(self.init_pose[:3])
        self.speed = 0
        self.proximity = 0

        self.speed_limit = 0.1

    def get_reward(self):
        """Uses current pose of sim to return reward."""

        self.dist = abs(self.sim.pose[:3] - self.target_pos).sum()
        self.vdist = abs(self.sim.pose[2] - self.target_pos[2])
        self.hdist = abs(self.sim.pose[:2] - self.target_pos[:2]).sum()
        self.speed = abs(self.last_vdist - self.vdist)

        if not self.proximity:
            if self.vdist < 20:
                self.proximity = 1

        # GENTLE LANDING
        # return 1 - (self.vdist/self.init_vdist) * 1.5
        proximity_reward = 1 - (self.vdist / self.init_vdist)

        speed_penalty = (1 - max(self.speed, 0.05) / 1)**(
            1 - (self.vdist / self.init_vdist))
        if np.isnan(speed_penalty):
            speed_penalty = 0.01

        #if self.vdist > 20:
        #    speed_penalty = 0.5

        #axis_adjust = 1
        #for axis in self.sim.pose[3:]:
        #if axis > 2:
        #axis_adjust = 0.5 / axis

        self.last_dist = self.dist
        self.last_vdist = self.vdist
        self.last_hdist = self.hdist
        #print(proximity_reward * speed_penalty)

        return proximity_reward * speed_penalty / self.action_repeat
        ''' Working Snapshot 
        proximity_reward = 1 - (self.vdist/self.init_vdist)

        speed_penalty = (1 - max(self.speed, 0.05)/1) ** (1 - (self.vdist / self.init_vdist))
        if np.isnan(speed_penalty):
            speed_penalty = 0.01

        return proximity_reward * speed_penalty #* axis_adjust
'''

    def step(self, rotor_speeds):
        self.rotor_speeds = self.clip(rotor_speeds)

        reward = 0
        pose_all = []
        for _ in range(self.action_repeat):
            done = self.sim.next_timestep(
                rotor_speeds)  # update the sim pose and velocities
            reward += self.get_reward()
            state = list(self.sim.pose)
            state.append(self.speed)
            pose_all.append(state)
        next_state = np.concatenate(pose_all)
        """Uses action to obtain next state, reward, done."""

        #done = self.sim.next_timestep(self.rotor_speeds) # update the sim pose and velocities
        #reward = self.get_reward()
        #next_state = self.sim.pose

        #next_state.append(self.vdist)

        return next_state, reward, done

    def reset(self):
        """Reset the sim to start a new episode."""
        self.sim.reset()
        self.dist = abs(self.init_pose[:3] - self.target_pos).sum()
        self.last_dist = self.dist
        self.init_dist = self.dist
        self.init_vdist = abs(self.sim.pose[2] - self.target_pos[2])
        self.init_hdist = abs(self.sim.pose[:2] - self.target_pos[:2]).sum()
        self.last_vdist = self.init_vdist
        self.last_hdist = self.init_hdist
        self.last_pos = np.array(self.init_pose[:3])
        self.speed = 0
        self.state = list(self.sim.pose)
        self.state.append(self.speed)
        self.state = np.concatenate([self.state] * 3)

        return self.state

    def new_target(self, target_pose):
        self.target_pos = target_pose
        print('Destination updated.')

    def clip(self, action):
        return np.clip(np.array(action), self.action_low, self.action_high)
Beispiel #15
0
class Task():
    """Task (environment) that defines the goal and provides feedback to the agent."""
    def __init__(self,
                 init_pose=None,
                 init_velocities=None,
                 init_angle_velocities=None,
                 runtime=5.,
                 target_pos=None):
        """Initialize a Task object.
        Params
        ======
            init_pose: initial position of the quadcopter in (x,y,z) dimensions and the Euler angles
            init_velocities: initial velocity of the quadcopter in (x,y,z) dimensions
            init_angle_velocities: initial radians/second for each of the three Euler angles
            runtime: time limit for each episode
            target_pos: target/goal (x,y,z) position for the agent
        """
        # Simulation
        self.sim = PhysicsSim(init_pose, init_velocities,
                              init_angle_velocities, runtime)
        self.action_repeat = 3

        self.state_size = self.action_repeat * 6
        self.action_low = 0
        self.action_high = 900
        self.action_size = 4

        # Goal - quadcopter should lower from start coordinates 0,0,10 to 0,0,8, and then hover in position.
        # the episode ends when the time expires or when the quadcopter goes out-of-bounds.
        self.target_pos = target_pos if target_pos is not None else np.array(
            [0., 0., 8.])

    def get_reward(self):
        """Uses current pose of sim to return reward."""
        # big reward if the target location (with some error margin) has been reached
        # bad reward if the drone gets very close to crashing
        #print("actual position vs target", self.sim.pose[:3], self.target_pos)
        if np.all(abs(self.sim.pose[:3] - self.target_pos) <= 0.3):
            reward = 100
        elif self.sim.pose[2] < 1:
            reward = -100
        else:
            reward = 1. - .3 * (abs(self.sim.pose[:3] - self.target_pos)).sum()
        return reward

    def step(self, rotor_speeds):
        """Uses action to obtain next state, reward, done."""
        reward = 0
        pose_all = []
        for _ in range(self.action_repeat):
            done = self.sim.next_timestep(
                rotor_speeds)  # update the sim pose and velocities
            reward += self.get_reward()
            pose_all.append(self.sim.pose)
        next_state = np.concatenate(pose_all)
        #stopping condition - if target position has been reached
        if np.all(abs(self.sim.pose[:3] - self.target_pos) <= 0.3):
            print("target reached, at position ", self.sim.pose[:3])
            #done = True
        return next_state, reward, done

    def reset(self):
        """Reset the sim to start a new episode."""
        self.sim.reset()
        state = np.concatenate([self.sim.pose] * self.action_repeat)
        return state
class LandingTask():
    """Task (environment) that defines the goal and provides feedback to the agent."""
    def __init__(self,
                 init_pose=None,
                 init_velocities=None,
                 init_angle_velocities=None,
                 runtime=5.,
                 target_pos=None):
        """Initialize a Task object.
        Params
        ======
            init_pose: initial position of the quadcopter in (x,y,z) dimensions and the Euler angles
            init_velocities: initial velocity of the quadcopter in (x,y,z) dimensions
            init_angle_velocities: initial radians/second for each of the three Euler angles
            runtime: time limit for each episode
            target_pos: target/goal (x,y,z) position for the agent
        """
        # Simulation
        self.sim = PhysicsSim(init_pose, init_velocities,
                              init_angle_velocities, runtime)
        self.action_repeat = 3

        self.state_size = self.action_repeat * 6
        self.action_low = 0
        self.action_high = 900
        self.action_size = 4
        self.sim_init_pose = self.sim.pose  # saving initial position of the quadcopter

        # Goal
        self.target_pos = target_pos if target_pos is not None else np.array(
            [0., 0., 0.])

    def get_reward(self):
        """Uses current pose of sim to return reward."""
        # Taking into account the deviation between the current position and the target position on the xy axis
        reward = (self.target_pos[2] - self.sim.pose[2]) - 0.5 * (
            abs(self.target_pos - self.sim.pose[:3]))[:2].sum()

        if self.sim.pose[2] <= self.target_pos[2]:
            reward += 140.0  # Giving positive point for passing the targeted Z axis
        else:
            reward -= 10  # Punishing for each step if the agent is still above the ground

        # Giving agent positive reward if at each step if the current position is below the initial position
        reward += (self.sim_init_pose - self.sim.pose)[2]

        # Bringing the whole reward in the range of [-1, 1] and summing it to give to give the range of [-3, -3] here in this case
        reward = np.tanh(reward).sum()

        # My last reward function
        #         reward = np.tanh(1 - 0.003*(abs(self.sim.pose[:3] - self.target_pos))).sum()

        return reward

    def step(self, rotor_speeds):
        """Uses action to obtain next state, reward, done."""
        reward = 0
        pose_all = []
        for _ in range(self.action_repeat):
            done = self.sim.next_timestep(
                rotor_speeds)  # update the sim pose and velocities
            reward += self.get_reward()
            pose_all.append(self.sim.pose)
        next_state = np.concatenate(pose_all)
        return next_state, reward, self.sim.pose[0], self.sim.pose[
            1], self.sim.pose[
                2], done  # Also passing the final value of x, y, z

    def reset(self):
        """Reset the sim to start a new episode."""
        self.sim.reset()
        state = np.concatenate([self.sim.pose] * self.action_repeat)
        return state
class Takeoff():
    """Task (environment) that defines the goal and provides feedback to the agent."""
    def __init__(self, init_pose=None, init_velocities=None, 
        init_angle_velocities=None, runtime=5., target_pos=None,position_noise=None):
        """Initialize a Task object.
        Params
        ======
            init_pose: initial position of the quadcopter in (x,y,z) dimensions and the Euler angles
            init_velocities: initial velocity of the quadcopter in (x,y,z) dimensions
            init_angle_velocities: initial radians/second for each of the three Euler angles
            runtime: time limit for each episode
            target_pos: target/goal (x,y,z) position for the agent
        """
        print("########2 Task to Takeoff")
        # Simulation
        self.sim = PhysicsSim(init_pose, init_velocities, init_angle_velocities, runtime) 
        self.action_repeat = 3

        self.state_size = self.action_repeat * 6
        self.action_low = 0
        self.action_high = 900
        self.action_size = 4
        self.runtime = runtime
        self.position_noise = position_noise
        # Goal
        self.target_pos = target_pos if target_pos is not None else np.array([0., 0., 10.]) 

    def get_reward(self):
        """Uses current pose of sim to return reward."""
        #reward = 1.-.3*(abs(self.sim.pose[:3] - self.target_pos)).sum()
        reward = 0.0

        #encorage acceleration in the +z-direction
        #reward += self.sim.linear_accel[2]
        #reward +=  0.5 * self.sim.linear_accel[2] * self.sim.dt**2

        #encorage velocity in the +z-direction
        if  self.sim.v[2] > 0:
            reward = 10*self.sim.v[2]
        else:
            reward = -10

        
        
        
        
        



        #encorage smaller position errors and limit the demenonator to not be too much close to zero
        #posDiff = -abs(self.sim.pose[ 2] - self.target_pos[ 2]) #**2 
        #if posDiff > 0.1: 
        #    reward += 10/posDiff 
        #else :
        #    reward += 100


        return reward

    def step(self, rotor_speeds):
        """Uses action to obtain next state, reward, done."""
        reward = 0
        pose_all = []
        for _ in range(self.action_repeat):
            done = self.sim.next_timestep(rotor_speeds) # update the sim pose and velocities
            reward += self.get_reward() 
            pose_all.append(self.sim.pose)

            #sparse reward at the end if target is reached 
            if(self.sim.pose[2] >= self.target_pos[2]):
                print("\n sucessfull takeoff to target target  !!!!!!!!!")
                reward += 10
                done = True

        #if np.isclose(self.target_pos[2], self.sim.pose[2], 1):
        #    reward += 100
        #    done = True



        #if(self.sim.pose[2] >= self.target_pos[2] and self.sim.time >= self.runtime):
        #if np.allclose(self.sim.pose[:-3],self.target_pos, rtol=1):
        #    reward +=100
        #    done = True

        next_state = np.concatenate(pose_all)
        return next_state, reward, done


    def reset(self):
        """Reset the sim with noise to start a new episode."""
        self.sim.reset()

        #::os:: add random start position as suggested by some papers to make agent more generalized
        if self.position_noise is not None or self.ang_noise is not None:
            rand_pose = np.copy(self.sim.init_pose)
            if self.position_noise is not None and self.position_noise > 0:
                rand_pose[:3] += np.random.normal(0.0, self.position_noise, 3)
                #print("start rand_pose =",rand_pose) 

            self.sim.pose = np.copy(rand_pose)

        state = np.concatenate([self.sim.pose] * self.action_repeat)
        return state
class Task():
    """Task (environment) that defines the goal and provides feedback to the agent."""
    def __init__(self,
                 init_pose = np.array([0.0,0.0,10.0,0.0,0.0,0.0]),
                 init_velocities = np.array([0.0,0.0,0.1]),
                 init_angle_velocities = np.array([0.0,0.0,0.0]),
                 runtime=5.,
                 target_pos=np.array([0.0,0.0,50.0])):
        """Initialize a Task object.
        Params
        ======
            init_pose: initial position of the quadcopter in (x,y,z) dimensions and the Euler angles
            init_velocities: initial velocity of the quadcopter in (x,y,z) dimensions
            init_angle_velocities: initial radians/second for each of the three Euler angles
            runtime: time limit for each episode
            target_pos: target/goal (x,y,z) position for the agent
        """
        # Simulation
        self.sim = PhysicsSim(init_pose, init_velocities, init_angle_velocities, runtime) 
        self.action_repeat = 3
        self.state_size    = self.action_repeat * 6
        
        self.action_low  = 10
        self.action_high = 900
        self.action_size = 4

        # Goal
        self.target_pos = target_pos if target_pos is not None else np.array([0., 0., 10.])

        # to calc reward
        self.pos_diff_init = None

    def get_reward(self, done):
        """Uses current pose of sim to return reward."""
        reward = 0
        self.calc_pos_diff_ratio()
        reward = self.calc_base_reward_2(reward)

        return reward

    def step(self, rotor_speeds):
        """Uses action to obtain next state, reward, done."""
        reward   = 0
        pose_all = []
        #
        for _ in range(self.action_repeat):
            done = self.sim.next_timestep(rotor_speeds) # update the sim pose and velocities
            # done = self.check_episode_end(done)
            reward += self.get_reward(done)
            pose_all.append(self.sim.pose)
            #
            if done:
                missing = self.action_repeat - len(pose_all)
                pose_all.extend([pose_all[-1]] * missing)
                break
        #
        next_state = np.concatenate(pose_all)
        return next_state, reward, done

    def reset(self):
        """Reset the sim to start a new episode."""
        self.sim.reset()
        state = np.concatenate([self.sim.pose] * self.action_repeat) 
        return state

    #============================================================================#
    def calc_pos_diff_ratio(self):
        if self.pos_diff_init is None:
            self.pos_diff_init = self.sim.init_pose[:3] - self.target_pos
            self.pos_diff_init = (sum(self.pos_diff_init ** 2)) ** 0.5
        pos_diff = self.sim.pose[:3] - self.target_pos
        pos_diff = (sum(pos_diff ** 2)) ** 0.5
        # Normalized distance
        self.pos_diff_ratio = pos_diff / (self.pos_diff_init + 0.001)

    def calc_base_reward_2(self, reward):
        # reward += 1 - self.pos_diff_ratio * 1.0
        reward += 1 - self.pos_diff_ratio * (1.0 / 50.0)
        return reward

    def calc_episode_end_reward(self, reward, done):
        if done and self.check_near_goal():
            reward += 100
        elif done and self.sim.pose[2] <= 0.:
            reward -= 50
        elif done and self.check_out_of_range():
            reward -= 30
        elif done and not self.check_out_of_range() and self.sim.runtime > self.sim.time:
            reward -= 20

    def check_episode_end(self, done):
        if self.check_near_goal():
            done = True
        if self.check_near_goal():
            done = True
        return done

    def check_near_goal(self):
        return np.abs(self.target_pos[2] - self.sim.pose[2]) < 1.0

    def check_out_of_range(self):
        return self.pos_diff_ratio > 2.0
Beispiel #19
0
class Task():
    """Task (environment) that defines the goal and provides feedback to the agent."""
    def __init__(self,
                 init_pose=None,
                 init_velocities=None,
                 init_angle_velocities=None,
                 runtime=5.,
                 target_pos=None):
        """Initialize a Task object.
        Params
        ======
            init_pose: initial position of the quadcopter in (x,y,z) dimensions and the Euler angles
            init_velocities: initial velocity of the quadcopter in (x,y,z) dimensions
            init_angle_velocities: initial radians/second for each of the three Euler angles
            runtime: time limit for each episode
            target_pos: target/goal (x,y,z) position for the agent
        """
        # Simulation
        self.sim = PhysicsSim(init_pose, init_velocities,
                              init_angle_velocities, runtime)
        self.action_repeat = 1

        self.state_size = self.action_repeat * 2
        self.action_low = 390
        self.action_high = 420
        self.action_range = self.action_high - self.action_low
        self.action_size = 1

        # Goal - hover at z=10, x,y=0
        self.target_pos = target_pos if target_pos is not None else np.array(
            [0., 0., 10.])

    def get_reward(self):
        """Uses current pose of sim to return reward."""
        reward = 0

        #reward = reward + 1.-0.3*(abs(self.sim.pose[2] - self.target_pos[2])).sum()
        reward = np.tanh(1 - 0.005 *
                         (abs(self.sim.pose[:3] - self.target_pos))).sum()

        # penalize too far from target position
        #distance = np.linalg.norm(self.target_pos[2] - self.sim.pose[2])
        #if (distance > 2):
        #    reward = reward - min(1,1/(distance + 1)**2)

        return reward

    def step(self, rotor_speeds):
        """Uses action to obtain next state, reward, done."""
        reward = 0
        pose_all = []
        for _ in range(self.action_repeat):
            done = self.sim.next_timestep(
                rotor_speeds)  # update the sim pose and velocities

            reward += self.get_reward()

            # update rotor speed to achieve target height
            # scale to +- 0.1, for input to the network
            z_norm = (self.sim.pose[2] - 10) / 3
            z_v_norm = (self.sim.v[2]) / 3
            rotor_norm = (rotor_speeds[0] -
                          self.action_low) / self.action_range * 2 - 1
            pose_all.append(z_norm)
            pose_all.append(z_v_norm)
            #print (z_norm, z_v_norm)
        next_state = np.array(pose_all)

        return next_state, reward, done

    def reset(self):
        """Reset the sim to start a new episode.
        Start each episode with the Quadcopyer a random distance (vertically) away from the target height
        """
        self.sim.reset()

        #perturb the start by +- one unit
        perturb_unit = 0.1
        self.sim.pose[2] += (2 * np.random.random() - 1) * perturb_unit

        z_norm = (self.sim.pose[2] - 10) / 10
        z_v_norm = 0
        rotor_norm = 0

        state = np.array(([z_norm, z_v_norm]) * self.action_repeat)

        return state
Beispiel #20
0
class Task():
    """Task (environment) that defines the goal and provides feedback to the agent."""
    def __init__(self,
                 init_pose=None,
                 init_velocities=None,
                 init_angle_velocities=None,
                 runtime=5.,
                 target_pos=None):
        """Initialize a Task object.
        Params
        ======
            init_pose: initial position of the quadcopter in (x,y,z) dimensions and the Euler angles
            init_velocities: initial velocity of the quadcopter in (x,y,z) dimensions
            init_angle_velocities: initial radians/second for each of the three Euler angles
            runtime: time limit for each episode
            target_pos: target/goal (x,y,z) position for the agent
        """
        # Simulation
        self.sim = PhysicsSim(init_pose, init_velocities,
                              init_angle_velocities, runtime)

        self.action_repeat = 3
        self.state_size = self.action_repeat * 6  # 6 states x 3 repeat = 18
        self.action_low = 500  # 0
        self.action_high = 850  # 900

        #reduce the ation size, try 4
        self.action_size = 4

        # Goal
        self.target_pos = target_pos if target_pos is not None else np.array(
            [0., 0., 10.])

    def get_reward(self):
        """Uses current pose of sim to return reward."""

        reward = 1.0

        reward += 5.0 - (abs(self.target_pos[2] - self.sim.pose[2]) /
                         self.target_pos[2])**0.4

        reward -= .02 * abs(self.sim.pose[:2] - self.target_pos[:2]).sum()

        reward -= .01 * abs(self.sim.pose[3:6]).sum()

        if self.target_pos[2] == self.sim.pose[2]:
            reward += 100.0

        #reward = 1.0

        #reward += 15.0 * (1.0 - (abs(self.target_pos[2] - self.sim.pose[2]) / self.target_pos[2]))

        #reward += 0.1 * (1.0 - (abs(self.sim.pose[0:1] - self.target_pos[0:1]).sum() / 100))

        #reward += 0.1 * (1.0 - (abs(self.sim.pose[3:6]).sum() / 100))

        #- 0.04 * abs(self.sim.pose[0:1] - self.target_pos[0:1]).sum() - 0.02 * abs(self.sim.pose[3:6]).sum()

        #reward += (self.sim.pose[2])

        #reward = 1
        #reward += (self.sim.pose[2])

    # reward += 1 * (1 - np.linalg.norm(self.sim.pose[:2] - [0., 0.]))
    #reward += 1 * (1 - np.linalg.norm(self.sim.pose[3:] - [0., 0., 0.]))

        return reward

    def step(self, rotor_speeds):
        """Uses action to obtain next state, reward, done."""
        reward = 0
        pose_all = []
        for _ in range(self.action_repeat):
            done = self.sim.next_timestep(
                rotor_speeds)  # update the sim pose and velocities
            reward += self.get_reward()
            pose_all.append(self.sim.pose)
        next_state = np.concatenate(pose_all)
        return next_state, reward, done

    def reset(self):
        """Reset the sim to start a new episode."""
        self.sim.reset()
        state = np.concatenate([self.sim.pose] * self.action_repeat)
        return state
Beispiel #21
0
class Task():
    """Task (environment) that defines the goal and provides feedback to the agent."""
    def __init__(self,
                 init_pose=None,
                 init_velocities=None,
                 init_angle_velocities=None,
                 runtime=5.,
                 target_pos=None):
        """Initialize a Task object.
        Params
        ======
            init_pose: initial position of the quadcopter in (x,y,z) dimensions and the Euler angles
            init_velocities: initial velocity of the quadcopter in (x,y,z) dimensions
            init_angle_velocities: initial radians/second for each of the three Euler angles
            runtime: time limit for each episode
            target_pos: target/goal (x,y,z) position for the agent
        """
        # Simulation
        self.sim = PhysicsSim(init_pose, init_velocities,
                              init_angle_velocities, runtime)
        self.action_repeat = 3

        self.state_size = self.action_repeat * 6
        self.action_low = 0
        self.action_high = 900
        self.action_size = 4

        # Goal
        self.target_pos = target_pos if target_pos is not None else np.array(
            [0., 0., 10.])

    def get_reward(self):
        """Uses current pose of sim to return reward."""
        #reward = 1.-.3*(abs(self.sim.pose[:3] - self.target_pos)).sum()
        reward = 0
        #penalty for distance between target and target position + stablility
        penalty = abs(
            np.square(self.sim.pose[:3] - self.target_pos)).sum() + abs(
                self.sim.pose[3:6]).sum()

        #for proximity with target
        distance = np.sqrt((self.sim.pose[0] - self.target_pos[0])**2 +
                           (self.sim.pose[1] - self.target_pos[1])**2 +
                           (self.sim.pose[2] - self.target_pos[2])**2)
        #         if distance < 5:
        #             reward += 100

        #reward += 50
        done = False
        if (self.sim.pose[2] > self.target_pos[2]):
            reward += 50
            done = True
        reward += reward - penalty * 0.01
        return reward, done

    def step(self, rotor_speeds):
        """Uses action to obtain next state, reward, done."""
        reward = 0
        pose_all = []
        for _ in range(self.action_repeat):
            done = self.sim.next_timestep(
                rotor_speeds)  # update the sim pose and velocities
            delta_reward, done_right = self.get_reward()
            reward += delta_reward
            pose_all.append(self.sim.pose)
        next_state = np.concatenate(pose_all)
        return next_state, reward, done

    def reset(self):
        """Reset the sim to start a new episode."""
        self.sim.reset()
        state = np.concatenate([self.sim.pose] * self.action_repeat)
        return state
Beispiel #22
0
class Task():
    """Task (environment) that defines the goal and provides feedback to the agent."""
    def __init__(self,
                 init_pose=None,
                 init_velocities=None,
                 init_angle_velocities=None,
                 runtime=5.,
                 target_pos=None):
        """Initialize a Task object.
        Params
        ======
            init_pose: initial position of the quadcopter in (x,y,z) dimensions and the Euler angles
            init_velocities: initial velocity of the quadcopter in (x,y,z) dimensions
            init_angle_velocities: initial radians/second for each of the three Euler angles
            runtime: time limit for each episode
            target_pos: target/goal (x,y,z) position for the agent
        """
        # Simulation
        self.sim = PhysicsSim(init_pose, init_velocities,
                              init_angle_velocities, runtime)
        self.action_repeat = 3

        # State
        self.state_size = self.action_repeat * (9)
        self.action_low = 0
        self.action_high = 900
        self.action_size = 4

        # Goal
        self.target_pos = target_pos if target_pos is not None else np.array(
            [0., 0., 150.])

    def get_reward(self):
        """Uses current pose of sim to return reward."""
        reward = 0
        penalty = 0
        current_position = self.sim.pose[:3]
        # PENALTIES

        #penalty for distance from target on x axis
        penalty += abs(current_position[0] - self.target_pos[0])**2
        #penalty for distance from target on y axis
        penalty += abs(current_position[1] - self.target_pos[1])**2
        #penalty for distance from target on z axis, weighted as this is more important for this task of hovering at a certain height
        penalty += 12 * abs(current_position[2] - self.target_pos[2])**2
        #penalty for uneven takeoff
        penalty += abs(self.sim.pose[3:6]).sum()
        #penalty for being far away from target and travelling fast
        penalty += 50 * abs(
            abs(current_position - self.target_pos).sum() -
            abs(self.sim.v).sum())
        #penalty for having too much different velocities of engines
        penalty += 13 * np.var(self.sim.angular_v)

        # REWARDS

        #ongoing reward for being airbourne
        if current_position[2] > 0.0:
            reward += 100
        #additional reward for flying near the target, where each x,y,z axis needs to be itself close to the target point for the agent to be rewarded
        if np.sqrt(
            (current_position[0] - self.target_pos[0])**2) < 10 and np.sqrt(
                (current_position[1] - self.target_pos[1])**
                2) < 10 and np.sqrt(
                    (current_position[2] - self.target_pos[2])**2) < 10:
            reward += 1000

        # TOTAL

        return reward - (penalty * 0.0002)

    def cur_state(self):
        state = np.concatenate([np.array(self.sim.pose), np.array(self.sim.v)])
        return state

    def step(self, rotor_speeds):
        """Uses action to obtain next state, reward, done."""
        reward = 0
        pose_all = []
        for _ in range(self.action_repeat):
            done = self.sim.next_timestep(
                rotor_speeds)  # update the sim pose and velocities
            reward += self.get_reward()
            state = self.cur_state()
            pose_all.append(self.cur_state())
        next_state = np.concatenate(pose_all)
        return next_state, reward, done

    def reset(self):
        """Reset the sim to start a new episode."""
        self.sim.reset()
        state = np.concatenate([self.cur_state()] * self.action_repeat)
        return state
Beispiel #23
0
class Task():
    """Task (environment) that defines the goal and provides feedback to the agent."""
    def __init__(self,
                 init_pose=None,
                 init_velocities=[0., 0., 50.],
                 init_angle_velocities=None,
                 runtime=5.,
                 target_height=100.):
        """Initialize a Task object.
        Params
        ======
            init_pose: initial position of the quadcopter in (x,y,z) dimensions and the Euler angles
            init_velocities: initial velocity of the quadcopter in (x,y,z) dimensions
            init_angle_velocities: initial radians/second for each of the three Euler angles
            runtime: time limit for each episode
            target_pos: target/goal (x,y,z) position for the agent
        """
        # Simulation
        self.sim = PhysicsSim(init_pose, init_velocities,
                              init_angle_velocities, runtime)
        self.action_repeat = 1

        self.state_size = self.action_repeat * 9
        self.action_low = 0
        self.action_high = 900
        self.action_size = 4

        # Goal
        self.target_height = target_height

    def get_reward(self):
        """Uses current pose of sim to return reward."""
        #reward = self.sim.v[2]**2
        #reward = 0.0
        reward = 10.0 / (1 + abs(self.sim.pose[2] - self.target_height))**2
        #reward += 100./(1+abs(np.linalg.norm(self.sim.pose[:2])-self.target_circle_radius))
        #reward -= 10.0*np.linalg.norm(self.sim.pose[6:9])**2
        #         reward += 1*np.linalg.norm(self.sim.pose[3:6])**2 if self.is_in_circle() else -0.5
        #         reward = 1 - np.tanh(abs(150 - self.sim.pose[2])/2 + abs(self.sim.v[2]/2))
        return reward

    def step(self, rotor_speeds):
        """Uses action to obtain next state, reward, done."""
        reward = 0
        pose_all = []
        for _ in range(self.action_repeat):
            done = self.sim.next_timestep(
                rotor_speeds)  # update the sim pose and velocities
            #             if done:
            #                 pass
            #reward = -500
            reward += self.get_reward()
            pose_all.append(
                np.concatenate((self.sim.pose, self.sim.v), axis=None))
        next_state = np.concatenate(pose_all)
        return next_state, reward, done

    def reset(self):
        """Reset the sim to start a new episode."""
        self.sim.reset()
        state = np.concatenate(
            [np.concatenate(
                (self.sim.pose, self.sim.v), axis=None)] * self.action_repeat)
        return state
Beispiel #24
0
class Task():
    """Task (environment) that defines the goal and provides feedback to the agent."""
    def __init__(self,
                 init_pose=None,
                 init_velocities=None,
                 init_angle_velocities=None,
                 runtime=5.,
                 target_pos=None):
        """Initialize a Task object.
        Params
        ======
            init_pose: initial position of the quadcopter in (x,y,z) dimensions and the Euler angles
            init_velocities: initial velocity of the quadcopter in (x,y,z) dimensions
            init_angle_velocities: initial radians/second for each of the three Euler angles
            runtime: time limit for each episode
            target_pos: target/goal (x,y,z) position for the agent
        """

        # Se a posição B ( alvo ) não for especificada, então assume uma posição padrão
        if target_pos is None:
            target_pos = np.array([0., 0., 100.])

        # Isola Coordenadas do Local a se deslocar
        x_tar = round(np.around(target_pos[0], decimals=0))
        y_tar = round(np.around(target_pos[1], decimals=0))
        z_tar = round(np.around(target_pos[2], decimals=0))

        # Se a posição A ( inicial ) não for especificada, então assume uma posição padrão
        if init_pose is None:
            init_pose = np.array([0., 0., 0., 0., 0., 0.])

        # Se a velocidade inicial não for especificada, então o drone direciona a velocidade de cada um dos eixos
        # X, Y & Z para as coordenadas a se deslocar
        if init_velocities is None:
            init_velocities = np.array([x_tar * 0.7, y_tar * 0.7, z_tar * 0.7])

        # Se a posição A ( inicial ) não for especificada, então assume uma posição padrão
        if init_angle_velocities is None:
            init_angle_velocities = np.array([0., 0., 0.])

        # Valores Padrão
        self.sim = PhysicsSim(init_pose, init_velocities,
                              init_angle_velocities, runtime)
        self.action_repeat = 3
        self.action_size = 4
        self.action_low = 0
        self.action_high = 900
        self.state_size = self.action_repeat * 6
        self.target_pos = target_pos

    def get_distance(self, x_tar, y_tar, z_tar, x_pos, y_pos, z_pos):
        """ Calcula a Distância entre DOIS Pontos, usando Geometria Analítica & Cálculo Vetorial """

        # calcula Hipotenusa entre os eixos X & Y
        cat_x = (x_tar - x_pos)**2
        cat_y = (y_tar - y_pos)**2
        hip_xy = (cat_x + cat_y)**(1 / 2)
        cat_xy = hip_xy**2

        # calcula Hipotenusa entre os eixos XY & Z
        cat_z = (z_tar - z_pos)**2
        hip_xyz = (cat_xy + cat_z)**(1 / 2)

        # retorna distância oriunda da Hipotenusa projetada nos eixos "X, Y & Z"
        hip_xyz = round(hip_xyz)
        return hip_xyz

    def get_reward(self):
        # Separa Valores dos Eixos Atuais
        x_pos = round(np.around(self.sim.pose[0], decimals=0))
        y_pos = round(np.around(self.sim.pose[1], decimals=0))
        z_pos = round(np.around(self.sim.pose[2], decimals=0))

        # Separa Valores dos Eixos Desejados
        x_tar = round(np.around(self.target_pos[0], decimals=0))
        y_tar = round(np.around(self.target_pos[1], decimals=0))
        z_tar = round(np.around(self.target_pos[2], decimals=0))

        # se estiver no chão e o chão não for o objetivo, penaliza o agente
        if ((z_tar != 0) and (z_pos < 1)):
            reward = -100000000
        else:

            # recompensa da distância do ponto "A" Vs ponto "B"
            dist_pB = self.get_distance(x_tar, y_tar, z_tar, x_pos, y_pos,
                                        z_pos)

            # realiza ajuste caso esteja no ponto exato
            if dist_pB < 2:
                reward = 30000000

            else:
                reward = (((1 / dist_pB) * 10000)**2)

        # divide por 3, em função de "self.action_repeat", desta forma as recompensas
        # seguem a estrutura planejada
        reward = reward / 3
        return reward

    def step(self, rotor_speeds):
        """Uses action to obtain next state, reward, done."""
        reward = 0
        pose_all = []
        for _ in range(self.action_repeat):
            done = self.sim.next_timestep(
                rotor_speeds)  # update the sim pose and velocities
            reward += self.get_reward()
            pose_all.append(self.sim.pose)
        next_state = np.concatenate(pose_all)
        return next_state, reward, done

    def reset(self):
        """Reset the sim to start a new episode."""
        self.sim.reset()
        state = np.concatenate([self.sim.pose] * self.action_repeat)
        return state
class Task():
    """Task (environment) that defines the goal and provides feedback to the agent."""
    def __init__(self, init_pose=None, init_velocities=None, 
        init_angle_velocities=None, runtime=5., target_pos=None, posn_tolerance=0.1, ang_pos_tolerance=0.1, 
                 target_vel=None, vel_tolerance=0.01, target_ang_vel=None, ang_vel_tolerance=0.01):
        """Initialize a Task object.
        Params
        ======
            init_pose: initial position of the quadcopter in (x,y,z) dimensions and the Euler angles
            init_velocities: initial velocity of the quadcopter in (x,y,z) dimensions
            init_angle_velocities: initial radians/second for each of the three Euler angles
            runtime: time limit for each episode
            target_pos: target/goal (x,y,z) position for the agent
            pos_tolerance: tolerance on the position in all of x, y and z
            ang_pos_tolerance: tolerance on the Euler angles
            target_vel: target/goal velocities in x, y, z for the agent
            vel_tolerance: tolerance on the velocity in all of x, y and z
            target_ang_vel: target angular velocities for the three Euler angles
            ang_vel_tolerance: tolerance on the angular velocity in all 3 of the Eular angles
        """
        # Simulation
        self.sim_time = 0.0 # Not amending sim but would like time in the reward hence hard coding these values
        self.sim_dt = 1 / 50.0
        self.sim = PhysicsSim(init_pose, init_velocities, init_angle_velocities, runtime) 
        self.action_repeat = 3

        self.state_size = self.action_repeat * 6
        self.action_low = 0
        self.action_high = 900
        self.action_size = 4

        # Goal
        self.target_pos = target_pos if target_pos is not None else np.array([0., 0., 10.])
        self.target_vel = target_vel if target_vel is not None else np.array([0., 0., 0.])
        self.target_ang_vel = target_ang_vel if target_ang_vel is not None else np.array([0., 0., 0.])

    def get_reward(self):
        """Uses current pose of sim to return reward."""
        # Starter reward...
        #reward = 1.-.3*(abs(self.sim.pose[:3] - self.target_pos)).sum()
        
        # Goal is to hover at start position
        # be close to start position
        distance_from_goal = np.linalg.norm(self.sim.pose[:3] - self.target_pos)
        
        # be not moving up/down/left/right/forward/backwards
        speed = abs(self.sim.v).sum()
        
        # be not spinning
        ang_speed = abs(self.sim.angular_v).sum()
        
        if distance_from_goal <= pos_tolerance: # reward for being within tolerance near the goal
            if speed <= vel_tolerance and ang_speed <= ang_vel_tolerance: # reward for being broadly stationary
                # quad is near goal and broadly stationary
                reward = 10.0
            else:
                # reduce reward proportional to speed/ang_speed
                reward = 10.0 - 0.2 * speed - 0.2 * ang_speed
        else: # the quad is not near enough to the goal therefore it should be moving
            # reward based solely on distance at this point
            reward = -1.0 * distance
            
        
        return reward

    def step(self, rotor_speeds):
        """Uses action to obtain next state, reward, done."""
        reward = 0.0
        pose_all = []
        for _ in range(self.action_repeat):
            # update the time like in the sim...
            self.sim_time += self.sim_dt
            done = self.sim.next_timestep(rotor_speeds) # update the sim pose and velocities
            reward += self.get_reward() 
            pose_all.append(self.sim.pose)
        next_state = np.concatenate(pose_all)
        return next_state, reward, done

    def reset(self):
        """Reset the sim to start a new episode."""
        self.sim.reset()
        state = np.concatenate([self.sim.pose] * self.action_repeat) 
        return state
Beispiel #26
0
class Task():
    """Task (environment) that defines the goal and provides feedback to the agent."""
    def __init__(self,
                 init_pose=None,
                 init_velocities=None,
                 init_angle_velocities=None,
                 runtime=5.,
                 target_pos=None):
        """Initialize a Task object.
        Params
        ======
            init_pose: initial position of the quadcopter in (x,y,z) dimensions and the Euler angles
            init_velocities: initial velocity of the quadcopter in (x,y,z) dimensions
            init_angle_velocities: initial radians/second for each of the three Euler angles
            runtime: time limit for each episode
            target_pos: target/goal (x,y,z) position for the agent
        """
        print('')
        print('init_pose: ', init_pose)
        print('init_velocities: ', init_velocities)
        print('target_pos: ', target_pos)
        print('')
        # Simulation
        self.sim = PhysicsSim(init_pose, init_velocities,
                              init_angle_velocities, runtime)
        self.action_repeat = 3

        self.state_size = self.action_repeat * 6
        self.action_low = 130  # originally 0
        self.action_high = 730  # originally 900
        self.action_size = 1  # 4 rotor speeds
        self.alpha = 1e-4  # learning rate for actor / policy update
        self.beta = 1e-3  # learning rate for critic / value update

        # Goal
        self.target_pos = target_pos if target_pos is not None else np.array(
            [0., 0., 10.])

    def get_reward(self, done):
        # reward function for the takeoff-task
        #rewards depending on position
        if (self.sim.pose[2] < self.target_pos[2] + 1) and (self.sim.v[2] < 0):
            #print('test')
            reward = -.5
        elif (self.sim.pose[2] > self.target_pos[2] - 1) and (self.sim.v[2] >
                                                              0):
            reward = -.5
        else:
            reward = 1. - .125 * (abs(self.target_pos[2] - self.sim.pose[2]))
            reward = np.clip(reward, -1, 1)
        # penalize crash
        if done and self.sim.time < self.sim.runtime:
            reward += -1
        #print(reward)
        return reward

    def step(self, rotor_speeds):
        """Uses action to obtain next state, reward, done."""
        reward = 0
        pose_all = []
        for _ in range(self.action_repeat):

            # use * np.ones(4) only if working with 1 rotor speed for all 4 rotors
            done = self.sim.next_timestep(
                rotor_speeds *
                np.ones(4))  # update the sim pose and velocities

            reward = self.get_reward(done)
            pose_all.append(self.sim.pose)
        next_state = np.concatenate(pose_all)
        return next_state, reward, done

    def reset(self):
        """Reset the sim to start a new episode."""
        self.sim.reset()
        state = np.concatenate([self.sim.pose] * self.action_repeat)
        return state
Beispiel #27
0
class Task():
    """Task (environment) that defines the goal and provides feedback to the agent."""
    def __init__(self,
                 init_pose=None,
                 init_velocities=None,
                 init_angle_velocities=None,
                 runtime=5.,
                 target_pos=None):
        """Initialize a Task object.
        Params
        ======
            init_pose: initial position of the quadcopter in (x,y,z) dimensions and the Euler angles
            init_velocities: initial velocity of the quadcopter in (x,y,z) dimensions
            init_angle_velocities: initial radians/second for each of the three Euler angles
            runtime: time limit for each episode
            target_pos: target/goal (x,y,z) position for the agent
        """
        # Simulation
        self.sim = PhysicsSim(init_pose, init_velocities,
                              init_angle_velocities, runtime)
        self.action_repeat = 3

        self.state_size = self.action_repeat * 9
        self.action_low = 0
        self.action_high = 900
        self.action_size = 4

        # Goal
        self.target_pos = target_pos if target_pos is not None else np.array(
            [0., 0., 0.])

    def get_reward(self):
        """Uses current pose of sim to return reward."""

        # Calculate distance from target
        dist_from_target = np.sqrt(
            np.square(self.sim.pose[:3] - self.target_pos).sum())
        #dist_from_target_squared = np.square(self.sim.pose[:3] - self.target_pos).sum()

        # Calculate distance from vertical axis
        #dist_from_axis = np.sqrt(np.square(self.sim.pose[:2] - self.target_pos[:2]).sum())

        # Calculate angular deviation
        angular_deviation = np.sqrt(np.square(self.sim.pose[3:]).sum())

        # Calculate velocity in xy plane
        #non_z_v = np.square(self.sim.v[:2]).sum()

        # Calculate overall velocity
        #vel = np.square(self.sim.v[:3]).sum()

        penalty = 0.004 * dist_from_target * dist_from_target
        #penalty = 0.015 * dist_from_target_squared

        # Penalty based on angular deviation to encourage the quadcopter to remain upright
        penalty += 0.008 * angular_deviation

        # Penalty based on movement in the xy plane to encourage the quadcopter to fly vertically
        #if dist_from_axis > 4:
        #   penalty += 0.1

        # Penalty for high velocity to encourage quadcopter to fly slower
        #if vel > 10.0:
        #   penalty += 0.1

        #if self.sim.pose[2] > self.target_pos[2] + 5:
        #    penalty += 0.01

        bonus = 1.0
        #if dist_from_target < 0.5:
        #    bonus += 0.01

        # Calculate reward
        reward = bonus - penalty
        # Reward for time to encourage quadcopter to remain in the air
        #reward += 0.001 * self.sim.time

        # clamp reward to [-1, 1]
        #return min(1.0, max(-1.0, reward))
        return np.tanh(reward)

    def step(self, rotor_speeds):
        """Uses action to obtain next state, reward, done."""
        reward = 0
        pose_all = []
        for _ in range(self.action_repeat):
            done = self.sim.next_timestep(
                rotor_speeds)  # update the sim pose and velocities
            reward += self.get_reward()
            pose_all.append(self.sim.pose)
            pose_all.append(self.sim.v)
        next_state = np.concatenate(pose_all)
        return next_state, reward, done

    def reset(self):
        """Reset the sim to start a new episode."""
        self.sim.reset()
        pose_all = np.append(self.sim.pose, self.sim.v)
        state = np.concatenate([pose_all] * self.action_repeat)
        return state
Beispiel #28
0
class TaskHover():
    def __init__(self,
                 positionStart=None,
                 positionStartNoise=0,
                 positionTarget=None,
                 nActionRepeats=1,
                 runtime=10.,
                 factorPositionZ=0.15,
                 factorPositionXY=0.0,
                 factorVelocityXY=0.3,
                 factorAngles=0.5,
                 factorAngleRates=0.0,
                 factorAngleAccels=0.0,
                 factorActions=0.05,
                 factorGlobal=1.0,
                 angleNoise=0,
                 angleRateNoise=0):

        # Internalize parameters.
        # Positions
        self.positionTarget = positionTarget if positionTarget is not None else np.array(
            [0., 0., 10.])
        self.positionStart = positionStart if positionStart is not None else np.array(
            [0., 0., 0.])
        # Reward factors.
        self.factorPositionZ = factorPositionZ
        self.factorPositionXY = factorPositionXY
        self.factorAngles = factorAngles
        self.factorAngleRates = factorAngleRates
        self.factorAngleAccels = factorAngleAccels
        self.factorActions = factorActions
        self.factorGlobal = factorGlobal
        self.positionStartNoise = positionStartNoise
        self.angleNoise = angleNoise
        self.angleRateNoise = angleRateNoise
        self.factorVelocityXY = factorVelocityXY
        # Number of action repeats.
        # For each agent time step we step the simulation multiple times and stack the states.
        self.nActionRepeats = nActionRepeats

        # Define action and state spaces.
        # Use OpenAI gym spaces to make this task behave
        # like an gym environment so we can use the same
        # agent as for the pendulum.
        # Init action space. Two actions, one for each side of the copter.
        # Limit actions to a range around hovering.
        self.action_low = 390
        self.action_high = 440
        self.action_space = spaces.Box(low=self.action_low,
                                       high=self.action_high,
                                       shape=(2, ))
        # Number of action repeats.
        # For each agent time step we step the simulation multiple times.
        self.nActionRepeats = nActionRepeats
        # Here the state is comprised of target vector XZ, velocity XY, cos theta, sin theta and theta angular velocities (7 values).
        # Positions are bounded by the environment bounds, angles by +-pi and angular velocities by 40 rad/s (happens with two rotors 0 and two 900)
        # Because we step multiple times we receive a multiple of the state vector.
        #low = np.tile(np.hstack((self.sim.lower_bounds[[0,2]], self.sim.lower_bounds, np.array([0, 0, 0]), np.array([-np.pi*10.0, -np.pi*10.0, -np.pi*10.0]))), self.nActionRepeats)
        #high = np.tile(np.hstack((self.sim.upper_bounds, self.sim.upper_bounds, np.array([np.pi*2.0, np.pi*2.0, np.pi]), np.array([np.pi*10.0, np.pi*10.0, np.pi*10.0]))), self.nActionRepeats)
        low = np.tile(np.hstack(([-10, -10], [-20, -20], -1, -1, -40)),
                      self.nActionRepeats)
        high = np.tile(np.hstack(([10, 10], [20, 20], 1, 1, 40)),
                       self.nActionRepeats)
        self.observation_space = spaces.Box(low=low, high=high)

        # Init initial conditions.
        # Rotation is randomized by +-0.01 degrees around all axes.
        init_pose = np.hstack((self.positionStart, np.array([0., 0., 0.])))
        # Initial velocity 0,0,0.
        init_velocities = np.array([0., 0., 0.])
        # Initial angular velocity 0,0,0.
        init_angle_velocities = np.array([0., 0., 0.])

        # Simulation
        self.sim = PhysicsSim(init_pose, init_velocities,
                              init_angle_velocities, runtime)

    def get_reward(self, rotor_speeds):
        return self.rewardVelocityXY(self.sim.v) + self.rewardAction(
            rotor_speeds) + self.rewardPositionXY(
                self.sim.pose) + self.rewardPositionZ(
                    self.sim.pose) + self.rewardAngles(
                        self.sim.pose,
                        self.sim.angular_v) + self.rewardAngleRates(
                            self.sim.pose[3:6],
                            self.sim.angular_v) + self.rewardAngleAccels(
                                self.sim.angular_v, self.sim.angular_accels)

    def rewardPositionZ(self, pose):
        #cGauss = 2
        cExp = -0.2
        #yGauss = np.exp(-((pose[2]-self.positionTarget[2])**2/(2*cGauss**2)))
        yExp = np.exp(cExp * np.abs(self.positionTarget[2] - pose[2]))
        return self.factorPositionZ * yExp * self.factorGlobal

    def rewardPositionXY(self, pose):
        a = .5
        return self.factorPositionXY * np.exp(
            -np.power(np.linalg.norm(self.positionTarget[:2] -
                                     pose[:2]), a)) * self.factorGlobal

    def rewardAngles(self, pose, angular_v):
        angles = pose[3:5]
        # convert angles from 0--2*pi to -pi--pi
        # get max angle from all 3.
        angles = np.max(
            np.abs([a if a <= np.pi else a - 2 * np.pi for a in pose[3:5]]))
        #x = np.max(  np.abs(  [a if a <= np.pi else a-2*np.pi for a in pose[3:5]]  )   )
        #return np.cos(x) * self.factorAngles

        # Also, only give reward for small angles if the angular velocity is small as well.
        # Thereby we can avoid to give reward if the copter is tumbling.
        # We do this by
        cV = .8
        factorV = np.max(np.abs(angular_v[0:2]))
        factorV = np.exp(-((factorV**2) / (2 * cV**2)))

        cA = 0.5
        return np.exp(-(
            (angles**2) /
            (2 * cA**2))) * factorV * self.factorAngles * self.factorGlobal

    def rewardVelocityXY(self, v):
        c = 4
        vNorm = np.linalg.norm(v)
        return (np.exp(-(
            (vNorm**2) /
            (2 * c**2)))) * self.factorVelocityXY * self.factorGlobal

    def rewardAngleRates(self, angles, angular_v):
        '''
        cV=0.5
        factorV = np.max(np.abs(angular_v[0:2]))
        return (np.exp(-((x**2)/(2*c**2)))*2-1) * self.factorAngleRates
        '''
        #return np.tanh(np.sum(-np.sign([a if a <= np.pi else a-2*np.pi for a in angles[0:2]]) * angular_v[0:2])/2.) * self.factorAngleRates
        rewards = -np.tanh(
            [a if a <= np.pi else a - 2 * np.pi
             for a in angles[0:2]]) * np.tanh(angular_v[0:2])
        rewardMax = rewards[np.argmax(np.abs(rewards))]
        return rewardMax * self.factorAngleRates * self.factorGlobal

    def rewardAngleAccels(self, angular_v, angular_accels):
        # Only reward acceleration if it reduces absolute angle rate.
        # If angle rate is positive, a negative acceleration shall be rewarded and vice versa.
        return np.tanh(
            np.sum(-np.sign(angular_v[0:2]) * angular_accels[0:2]) /
            2.) * self.factorAngleAccels * self.factorGlobal

    def rewardAction(self, rotor_speeds):
        return -(np.mean(rotor_speeds) - self.action_low) / (
            self.action_high - self.action_low) * self.factorActions

    def step(self, rotor_speeds):
        """Uses action to obtain next state, reward, done."""
        reward = 0
        pose_all = []
        for _ in range(self.nActionRepeats):
            done = self.sim.next_timestep(
                rotor_speeds)  # update the sim pose and velocities
            reward += self.get_reward(rotor_speeds) / self.nActionRepeats
            vectorTarget = self.positionTarget[[0, 2]] - self.sim.pose[[0, 2]]
            distanceTarget = np.max((np.linalg.norm(vectorTarget), 1e-10))
            pose_all.append((vectorTarget / distanceTarget * np.min(
                (10.0, distanceTarget))))
            pose_all.append(
                self.sim.v[[0, 2]]
            )  # / np.max((np.linalg.norm(self.sim.v[[0,2]]),1e-10)) * np.min((20.0, np.linalg.norm(self.sim.v[[0,2]]))))
            #pose_all.append([self.sim.pose[4] if self.sim.pose[4] <= np.pi else self.sim.pose[4]-2*np.pi])
            pose_all.append([np.cos(self.sim.pose[4])])
            pose_all.append([np.sin(self.sim.pose[4])])
            pose_all.append(
                [np.max((np.min((self.sim.angular_v[1], np.pi)), -np.pi))])
        next_state = np.concatenate(pose_all)
        return next_state, reward, done, None

    def reset(self):
        """Reset the sim to start a new episode."""
        self.sim.reset(positionNoise=self.positionStartNoise,
                       angleNoise=self.angleNoise,
                       angleRateNoise=self.angleRateNoise)
        vectorTarget = self.positionTarget[[0, 2]] - self.sim.pose[[0, 2]]
        distanceTarget = np.max((np.linalg.norm(vectorTarget), 1e-10))
        state = np.concatenate([
            (vectorTarget / distanceTarget * np.min((10.0, distanceTarget))),
            self.sim.v[[
                0, 2
            ]],  # / np.max((np.linalg.norm(self.sim.v[[0,2]]),1e-10)) * np.min((20.0, np.linalg.norm(self.sim.v[[0,2]]))),
            #[self.sim.pose[4] if self.sim.pose[4] <= np.pi else self.sim.pose[4]-2*np.pi],
            [np.cos(self.sim.pose[4])],
            [np.sin(self.sim.pose[4])],
            [self.sim.angular_v[1]]
        ] * self.nActionRepeats)
        return state
Beispiel #29
0
class Task():
    """Task (environment) that defines the goal and provides feedback to the agent."""
    def __init__(self,
                 init_pose=None,
                 init_velocities=None,
                 init_angle_velocities=None,
                 runtime=5.,
                 target_pos=None):
        """Initialize a Task object.
        Params
        ======
            init_pose: initial position of the quadcopter in (x,y,z) dimensions and the Euler angles
            init_velocities: initial velocity of the quadcopter in (x,y,z) dimensions
            init_angle_velocities: initial radians/second for each of the three Euler angles
            runtime: time limit for each episode
            target_pos: target/goal (x,y,z) position for the agent
        """
        # Simulation
        self.sim = PhysicsSim(init_pose, init_velocities,
                              init_angle_velocities, runtime)
        self.action_repeat = 3

        self.state_size = self.action_repeat * 6
        self.action_low = 0
        self.action_high = 900
        self.action_size = 1  # All four propellant with same thrust

        # Goal
        self.target_pos = target_pos if target_pos is not None else np.array(
            [0., 0., 10.])

    def get_reward(self, lv, av):
        """Uses current pose, v and angular_v of sim to return reward."""
        reward = 0
        """current pose of sim to calculate reward."""
        reward += (10. - .2 * (abs(self.sim.pose[:3] - self.target_pos)).sum())
        """current linear velocity of sim to calculate reward."""
        reward += 5 - .3 * (abs(self.sim.v - lv)).sum()
        """current angular velocity of sim to calculate reward."""
        #reward += -1*(abs(self.sim.angular_v - av)).sum()

        return reward

    def step(self, rotor_speeds):
        """Uses action to obtain next state, reward, done."""
        reward = 0
        pose_all = []
        prev_v = []
        prev_ang_v = []
        for _ in range(self.action_repeat):
            prev_v = self.sim.v
            prev_ang_v = self.sim.angular_v
            # update the sim pose and velocities
            done = self.sim.next_timestep(rotor_speeds)
            reward += self.get_reward(prev_v, prev_ang_v)
            pose_all.append(self.sim.pose[:-3])
            pose_all.append(
                self.sim.angular_v
            )  #pose_all.append(self.sim.v)  # Added velocities to the states
        next_state = np.concatenate(pose_all)
        return next_state, reward, done

    def reset(self):
        """Reset the sim to start a new episode."""
        self.sim.reset()
        state = np.concatenate(
            [self.sim.pose[:-3], self.sim.angular_v] *
            self.action_repeat)  # Added velocities to the states
        return state
Beispiel #30
0
class Task():
    """Task (environment) that defines the goal and provides feedback to the agent."""
    def __init__(self,
                 init_pose=None,
                 init_velocities=None,
                 init_angle_velocities=None,
                 runtime=5.,
                 sel_state_variables=['pose', 'v', 'angular_v'],
                 target_state_variables=None,
                 repeat=2):
        """Initialize a Task object.
        Params
        ======
            init_pose: initial position of the quadcopter in (x,y,z) dimensions
                and the Euler angles
            init_velocities: initial velocity of the quadcopter in (x,y,z)
                dimensions init_angle_velocities: initial radians/second for 
                each of the three Euler angles
            runtime: time limit for each episode
            target_pos: target/goal (x,y,z) position for the agent
        """
        # Simulation
        self.sim = PhysicsSim(init_pose, init_velocities,
                              init_angle_velocities, runtime,
                              sel_state_variables)
        self.action_repeat = repeat

        # Goal, current state is target state if not specified
        if target_state_variables == None:
            self.target_pose = self.sim.pose
            self.target_velocity = self.sim.v
            self.target_angular_velocity = self.sim.angular_v
            target_state_variables = self.sim.get_state_variables()
        self.target_state_variables = target_state_variables

        self.state_size = self.action_repeat * len(target_state_variables)
        self.action_low = 0
        self.action_high = 900
        self.action_size = 4

    def get_state_variables(self):
        return self.sim.get_state_variables()

    def get_state_labels(self):
        return self.sim.get_state_labels()

    def get_time(self):
        return self.sim.get_time()

    def get_reward(self):
        # quadratic reward function
        dif = self.sim.get_state_variables() - self.target_state_variables
        reward = 5 - dif.dot(dif) / 25.0

        return reward

    def step(self, rotor_speeds):
        """Uses action to obtain next state, reward, done."""
        reward = 0
        pose_all = []
        for _ in range(self.action_repeat):
            done = self.sim.next_timestep(rotor_speeds)
            # update the sim pose and velocities
            reward += self.get_reward()
            pose_all.append(self.sim.get_state_variables())
        next_state = np.concatenate(pose_all)
        return next_state, reward, done

    def reset(self):
        """Reset the sim to start a new episode."""
        self.sim.reset()
        state = np.array(self.get_state_variables().tolist() *
                         self.action_repeat)
        return state
Beispiel #31
0
class Task():
    """Task (environment) that defines the goal and provides feedback to the agent."""
    def __init__(self,
                 init_pose=None,
                 init_velocities=None,
                 init_angle_velocities=None,
                 runtime=5.,
                 target_pos=None):
        """Initialize a Task object.
        Params
        ======
            init_pose: initial position of the quadcopter in (x,y,z) dimensions and the Euler angles
            init_velocities: initial velocity of the quadcopter in (x,y,z) dimensions
            init_angle_velocities: initial radians/second for each of the three Euler angles
            runtime: time limit for each episode
            target_pos: target/goal (x,y,z) position for the agent
        """
        # Simulation
        self.sim = PhysicsSim(init_pose, init_velocities,
                              init_angle_velocities, runtime)
        self.action_repeat = 3

        self.state_size = self.action_repeat * 6
        #self.state_size = self.action_repeat * 9
        self.action_low = 0
        self.action_high = 900
        self.action_size = 4

        # Goal
        self.target_pos = target_pos if target_pos is not None else np.array(
            [0., 0., 1.])
        self.init_pose = init_pose if init_pose is not None else np.array(
            [0., 0., 20.])
        self.last_pose = self.init_pose

    def get_reward(self):
        """Uses current pose of sim to return reward."""

        xy_penalty = 0.01 * abs(self.sim.v[0]) if abs(
            self.sim.v[0]) > 0. else 0
        xy_penalty += 0.01 * abs(self.sim.v[1]) if abs(
            self.sim.v[1]) > 0. else 0
        downward_penalty = 0.3 * abs(
            self.sim.pose[2] -
            self.init_pose[2]) if self.init_pose[2] > self.sim.pose[2] else 0

        z_displacement = self.sim.pose[2] - self.last_pose[
            2]  #To appoarch value 0.3 per 0.02s, 5m per second
        z_displacement *= 10  #scale up
        upward_reward = 0.5 * norm(
            loc=3, scale=1.).pdf(z_displacement) if z_displacement > 0. else 0
        #max 0.195

        upward_reward += min(1., 0.3 *
                             abs(self.sim.pose[2] - self.init_pose[2])
                             ) if self.init_pose[2] < self.sim.pose[2] else 0
        #upward_reward += 0.3 * abs(self.sim.pose[2] - self.init_pose[2]) if self.init_pose[2] < self.sim.pose[2] else 0
        #max 1

        reward = -1 * (xy_penalty + downward_penalty) + upward_reward
        reward = np.tanh(reward)

        #         if self.sim.pose[2] > self.target_pos[2]:
        #             reward += 10

        return reward

    def step(self, rotor_speeds):
        """Uses action to obtain next state, reward, done."""
        reward = 0
        pose_all = []

        self.last_pose = np.copy(self.sim.pose)

        for _ in range(self.action_repeat):
            done = self.sim.next_timestep(
                rotor_speeds)  # update the sim pose and velocities
            reward += self.get_reward()
            pose_all.append(self.sim.pose)
            #pose_all.append(self.sim.v)
        next_state = np.concatenate(pose_all)
        return next_state, reward, done

    def reset(self):
        """Reset the sim to start a new episode."""
        self.sim.reset()
        state = np.concatenate([self.sim.pose] * self.action_repeat)
        #state = np.concatenate((self.sim.pose,self.sim.v))
        #state = np.concatenate([state] * self.action_repeat)
        self.last_pose = np.copy(self.init_pose)
        return state
Beispiel #32
0
class Landing():
    """Task (environment) that defines the goal and provides feedback to the agent."""
    def __init__(self,
                 init_pose=None,
                 init_velocities=None,
                 init_angle_velocities=None,
                 runtime=5.,
                 target_pos=None):
        """Initialize a Task object.
        Params
        ======
            init_pose: initial position of the quadcopter in (x,y,z) dimensions and the Euler angles
            init_velocities: initial velocity of the quadcopter in (x,y,z) dimensions
            init_angle_velocities: initial radians/second for each of the three Euler angles
            runtime: time limit for each episode
            target_pos: target/goal (x,y,z) position for the agent
        """
        # Simulation
        self.sim = PhysicsSim(init_pose, init_velocities,
                              init_angle_velocities, runtime)
        self.action_repeat = 3

        self.state_size = self.action_repeat * 6
        self.action_low = 0
        self.action_high = 900
        self.action_size = 4
        self.runtime = runtime

        # Goal
        self.target_velocity = np.array([0.0, 0.0,
                                         0.0])  # ideally zero velocity
        self.last_timestamp = 0
        self.last_position = np.array([0.0, 0.0, 0.0])
        self.target_pos = target_pos if target_pos is not None else np.array(
            [0., 0., 10.])

    def istargetzone(self):
        """Checks whether the copter has landed in target zone or not"""
        flag = False
        cntr = 0
        position = self.sim.pose[:3]

        #Set upper bound and lower bound for target zone
        target_bounds = 40
        lower_bounds = np.array([-target_bounds / 2, -target_bounds / 2, 0])
        upper_bounds = np.array(
            [target_bounds / 2, target_bounds / 2, target_bounds])

        #Set boundary conditions
        lower_pos = (self.target_pos + lower_bounds)
        upper_pos = (self.target_pos + upper_bounds)

        #Check whether the copter has landed with the boundaries of target zone
        for j in range(3):

            #Check for the boundary conditions
            if (lower_pos[j] <= position[j] and position[j] < upper_pos[j]):
                cntr = cntr + 1

        #Check if all 3 conditions have been satisfied
        if cntr == 3:
            flag = True

        return flag

    def get_reward(self):
        """Uses current pose of sim to return reward."""

        #Calculate distance between current position and target position
        distance = np.linalg.norm((self.sim.pose[:3] - self.target_pos))
        distance_max = np.linalg.norm(self.sim.upper_bounds)

        #Calculate velocity
        velocity = np.linalg.norm((self.sim.v - self.target_velocity))

        # Calculate distance factor and velocity factor
        distance_factor = 1 / max(distance, 0.1)
        vel_discount = (1.0 - max(velocity, 0.1)**(distance_factor))

        reward = 0

        # Penalize agent running out of time
        if self.sim.time >= self.runtime:
            reward = -10.0
            self.sim.done = True
        else:
            # Agent has touched the ground surface (i.e. z=0)
            if (self.sim.pose[2] == self.target_pos[2]):

                # If velocity is less than the specified threshold
                # it implies that the agent has landed successfulyy
                if (self.sim.v[2] <= 1):

                    if (self.istargetzone() == True):
                        #Landed safely. Give bonus points for landing in the target zone
                        landing_reward = 100.0
                        print('Agent has landed in the target zone')

                    else:
                        reward = -100.0  #Landed outside target zone
                        print('outside')

                else:
                    #Penalize agent for crashing
                    reward = -100  # Crashed
                    self.sim.done = True

            else:
                if (np.isnan(self.sim.v[2]) == False):
                    # Depending upon the distance of the copter from the target position a normal penalty has been applied
                    distance_reward = 0.2 - (distance / distance_max)**0.1
                    reward = vel_discount * distance_reward
                else:
                    #Penalize agent for crashing
                    reward = -100  # Crashed
                    self.sim.done = True

        #Apply tanh to avoid instability in training due to exploding gradients
        reward = np.tanh(reward)

        return reward

    def step(self, rotor_speeds):
        """Uses action to obtain next state, reward, done."""
        reward = 0

        pose_all = []
        for _ in range(self.action_repeat):
            #print(rotor_speeds)
            done = self.sim.next_timestep(
                rotor_speeds)  # update the sim pose and velocities
            reward += self.get_reward()
            pose_all.append(self.sim.pose)

        next_state = np.concatenate(pose_all)
        return next_state, reward, done

    def reset(self):
        """Reset the sim to start a new episode."""
        self.sim.reset()
        self.last_timestamp = 0
        self.last_position = np.array([0.0, 0.0, 0.0])
        state = np.concatenate([self.sim.pose] * self.action_repeat)
        return state