Beispiel #1
0
class Task:
    """Task (environment) that defines the goal and provides feedback to the agent."""
    def __init__(
        self,
        init_pose=None,
        init_velocities=None,
        init_angle_velocities=None,
        runtime=5.0,
        target_pos=None,
    ):
        """Initialize a Task object.
        Params
        ======
            init_pose: initial position of the quadcopter in (x,y,z) dimensions and the Euler angles
            init_velocities: initial velocity of the quadcopter in (x,y,z) dimensions
            init_angle_velocities: initial radians/second for each of the three Euler angles
            runtime: time limit for each episode
            target_pos: target/goal (x,y,z) position for the agent
        """
        # Simulation
        self.sim = PhysicsSim(init_pose, init_velocities,
                              init_angle_velocities, runtime)
        self.action_repeat = 3

        self.state_size = self.action_repeat * 6
        self.action_low = 0
        self.action_high = 900
        self.action_size = 4

        # Goal
        self.target_pos = (target_pos if target_pos is not None else np.array(
            [0.0, 0.0, 10.0]))

    @staticmethod
    def sigmoid(x):
        return 1 / (1 + np.exp(-x))

    def get_reward(self):
        """Uses current pose of sim to return reward."""
        # If far away from the target position, we subtract by scale of scale_factor.
        # scale_factor = 0.3  # This says within 3.333 distance results in positive reward.
        scale_factor = 0.8  # This says within 1.25 distance results in positive reward.

        # Position is a much more important factor, so it has higher scale than velocity.
        # The reward defines what it deems the "best position" is, so we want to make
        # sure that the best position is mostly based on the physical location and now
        # using a really small velocity.
        scale_factor_vel = 0.001

        # Prevent the body velocity of the quadcopter from going too high.
        # This should make it smoother.
        reward = (
            1.0 - scale_factor *
            ((abs(self.sim.pose[:3] - self.target_pos)).sum()**2) -
            scale_factor_vel * (self.sim.find_body_velocity().sum()**2)
            # - scale_factor_vel * abs(self.sim.find_body_velocity().sum())
        )

        # # My task is moving from (0,0,0) to (0,10,0). We want to prevent the
        # # quadcopter from going too high in the air. We can simply subtract
        # # the sigmoid of the z position. If close to the ground, we will subtract close to 0.
        # # If too high it will subtract 1. Divide z position by 3 to allow for some height.
        # # Subtract 0.5 and multiply total by 2 to get values between 0 and 1
        # reward = (
        #     1.0
        #     - scale_factor * (abs(self.sim.pose[:3] - self.target_pos)).sum()
        #     - 2 * (Task.sigmoid(self.sim.pose[2] / 3) - 0.5)
        # )
        return reward

    def step(self, rotor_speeds):
        """Uses action to obtain next state, reward, done."""
        reward = 0
        pose_all = []
        for _ in range(self.action_repeat):
            done = self.sim.next_timestep(
                rotor_speeds)  # update the sim pose and velocities
            reward += self.get_reward()
            pose_all.append(self.sim.pose)
        next_state = np.concatenate(pose_all)
        return next_state, reward, done

    def reset(self):
        """Reset the sim to start a new episode."""
        self.sim.reset()
        state = np.concatenate([self.sim.pose] * self.action_repeat)
        return state
Beispiel #2
0
class TaskTakeOff():
    """Task (environment) that defines the goal and provides feedback to the agent."""
    def __init__(self, init_pose=None, init_velocities=None, 
        init_angle_velocities=None, runtime=5., target_pos=None):
        """Initialize a Task object.
        Params
        ======
            init_pose: initial position of the quadcopter in (x,y,z) dimensions and the Euler angles
            init_velocities: initial velocity of the quadcopter in (x,y,z) dimensions
            init_angle_velocities: initial radians/second for each of the three Euler angles
            runtime: time limit for each episode
            target_pos: target/goal (x,y,z) position for the agent
        """
        # Simulation
        self.sim = PhysicsSim(init_pose, init_velocities, init_angle_velocities, runtime) 
        self.action_repeat = 3

        self.state_size = self.action_repeat * 6
        self.action_low = 0
        self.action_high = 900
        self.action_size = 4

        # Goal
        self.target_pos = target_pos if target_pos is not None else np.array([0., 0., 10.]) 
        self.init_position = init_pose[:3] if init_pose is not None else np.array([0., 0., 0.])
        self.orig_distances = abs(self.target_pos - self.init_position)
        #print("target_pos = {} init_position = {} orig_distances = {}".format(self.target_pos, self.init_position, self.orig_distances ))

    def get_reward_orig(self):
        """Uses current pose of sim to return reward."""
        reward = 1.-.3*(abs(self.sim.pose[:3] - self.target_pos)).sum()
        return reward
    
    def get_reward_basic(self):
        """Uses current pose of sim to return reward."""
        #indices# X=0, Y=1, Z=2, PSI=3, THETA=4, PHI=5
        X = self.sim.pose[0]
        Y = self.sim.pose[1]
        Z = self.sim.pose[2]
        PSI   = self.sim.pose[3]
        THETA = self.sim.pose[4]
        PHI   = self.sim.pose[5] 
        #set rewards
        #positive reward for positive Z, penalties for X & Y motion
        #Euler angles and velocities are neglected as their effects 
        #will reflect upon the (x,y,z) coordinates anyway and will then be 
        #followed through.
        reward =  0.0 + 1*(Z) - 0.3*(abs(X)+abs(Y))
        #End episode if max height is breached 
        done = False
        if self.sim.pose[2] >= self.target_pos[2]: # agent has crossed the target height
            #raise TypeError
            reward += 50.0  # bonus reward
            done = True
        #normalize reward to avoid overflows in other computations such as gradients
        #Since we will end an episode once it reaches a height of 10, and give it a bonus reward of 50
        #the max positice reward is 60:
        #  as (0,0,10) -> reward calculated above of 10 + 50 as bonus
        #to normalize we will consider the min value to be -60 
        Z = (reward + 60) / (120) # Z = (value - min)/(max - min), here (max - min)=(60-(-60)) = 120 always
        reward = Z
        return reward, done
    
    def get_reward_complex(self):
        """Uses current pose of sim to return reward."""
        #indices of self.sim.pose --> X=0, Y=1, Z=2, PSI=3, THETA=4, PHI=5
        X = self.sim.pose[0]
        Y = self.sim.pose[1]
        Z = self.sim.pose[2]
        PSI   = self.sim.pose[3]
        THETA = self.sim.pose[4]
        PHI   = self.sim.pose[5] 
        body_velocity = self.sim.find_body_velocity()
        #print("body_velocity.type={}".format(type(body_velocity))
        #set rewards
        euler_angle_sum = abs(PSI)+abs(THETA)+abs(PHI)
        reward = 100.0
        reward -= 0.3 * (abs(X)+abs(Y)) #penalty for movement along x and y planes
        reward -= 0.5 * euler_angle_sum #penalty for euler angle movement
        return round(reward,2)
    
    def get_reward_complex2(self):
        """Uses current pose of sim to return reward."""
        #indices of self.sim.pose --> X=0, Y=1, Z=2, PSI=3, THETA=4, PHI=5
        X = self.sim.pose[0]
        Y = self.sim.pose[1]
        Z = self.sim.pose[2]
        PSI   = self.sim.pose[3]
        THETA = self.sim.pose[4]
        PHI   = self.sim.pose[5] 
        body_velocity = self.sim.find_body_velocity()
        #print("body_velocity.type={}".format(type(body_velocity))
        #set rewards 
        euler_angle_sum = abs(PSI)+abs(THETA)+abs(PHI)
        #positive reward for positive Z movement
        #penalty for movement along x and y planes
        reward =  0.0 + 1*(Z) - 0.3*(abs(X)+abs(Y))
        reward -= 0.5 * euler_angle_sum #penalty for euler angle movement
        return round(reward,2)
    
    get_reward = get_reward_basic  #switch to select the particular get_reward needed.

    def step(self, rotor_speeds):
        """Uses action to obtain next state, reward, done."""
        reward = 0
        pose_all = []
        for _ in range(self.action_repeat):
            done = self.sim.next_timestep(rotor_speeds) # update the sim pose and velocities
            #check if episode is done due to desired height being reached
            delta_reward, done_height  = self.get_reward() 
            reward += delta_reward
            if done_height:
                done = done_height
            pose_all.append(self.sim.pose)
        next_state = np.concatenate(pose_all)
        return next_state, reward, done

    def reset(self):
        """Reset the sim to start a new episode."""
        self.sim.reset()
        state = np.concatenate([self.sim.pose] * self.action_repeat) 
        return state
Beispiel #3
0
class Task():
    """Task (environment) that defines the goal and provides feedback to the agent."""
    def __init__(self,
                 init_pose=None,
                 init_velocities=None,
                 init_angle_velocities=None,
                 runtime=5.,
                 target_pos=None):
        """Initialize a Task object.
        Params
        ======
            init_pose: initial position of the quadcopter in (x,y,z) dimensions and the Euler angles
            init_velocities: initial velocity of the quadcopter in (x,y,z) dimensions
            init_angle_velocities: initial radians/second for each of the three Euler angles
            runtime: time limit for each episode
            target_pos: target/goal (x,y,z) position for the agent
        """
        # Simulation
        self.sim = PhysicsSim(init_pose, init_velocities,
                              init_angle_velocities, runtime)
        self.action_repeat = 3

        self.state_size = self.action_repeat * 6
        self.action_low = 100
        self.action_high = 800
        self.action_size = 4

        # Goal
        self.target_pos = target_pos if target_pos is not None else np.array(
            [0., 0., 10.])

    def get_reward(self, done):
        """Uses current pose of sim to return reward."""

        r0 = self.target_pos[2]
        r = self.sim.pose[2]
        rdot = self.sim.find_body_velocity()[2]

        if r > 0:
            reward = (1 - np.tanh(0.1 * np.abs(r - r0))) * 0.4
            reward += (1 - np.tanh(0.0625 * np.abs(rdot))) * 0.4
            reward += (1 -
                       np.tanh(0.1 * np.abs(self.sim.pose[3:].sum()))) * 0.1
            reward += (1 -
                       np.tanh(0.025 * np.abs(self.sim.angular_v.sum()))) * 0.1

            if done & (r < 2.5 * r0):
                reward = 50

        else:
            reward = -30

        return reward

    def step(self, rotor_speeds):
        """Uses action to obtain next state, reward, done."""
        reward = 0
        pose_all = []
        for _ in range(self.action_repeat):
            self.body_velocity_bstep = self.sim.find_body_velocity()
            done = self.sim.next_timestep(
                rotor_speeds)  # update the sim pose and velocities
            reward += self.get_reward(done)
            pose_all.append(self.sim.pose)
        next_state = np.concatenate(pose_all)

        return next_state, reward, done

    def reset(self):
        """Reset the sim to start a new episode."""
        self.sim.reset()
        state = np.concatenate([self.sim.pose] * self.action_repeat)
        return state
Beispiel #4
0
class Task():
    """Task (environment) that defines the goal and provides feedback to the agent."""
    def __init__(self, physicsSim_Params, target_pos=None):
        """Initialize a Task object.
        Params
        ======
            init_pose: initial position of the quadcopter in (x,y,z) dimensions and the Euler angles
            init_velocities: initial velocity of the quadcopter in (x,y,z) dimensions
            init_angle_velocities: initial radians/second for each of the three Euler angles
            runtime: time limit for each episode
            target_pos: target/goal (x,y,z) position for the agent
        """

        # Simulation
        self.sim = PhysicsSim(physicsSim_Params)
        self.action_repeat = 3

        self.state_size = self.action_repeat * 6
        self.action_low = 0
        self.action_high = 900
        self.action_size = 4

        # Goal
        self.target_pos = target_pos if target_pos is not None else np.array(
            [0., 0., 10.])

    def get_reward(self, rotor_speeds):
        """Uses current pose of sim to return reward."""
        target_closeness_reward = 1. - .3 * (abs(self.sim.pose[:3] -
                                                 self.target_pos)).sum()
        # Severely penalize the helicopter having a 'body' velocity that's too large, regardless of direction
        velocity_penalties = abs(self.sim.find_body_velocity()).sum()

        # per slack room, idea to minimize euler angles
        euler_bias = 5
        eulers_angle_penalty = abs(self.sim.pose[3:]).sum() - euler_bias

        # episode ends when hitting the ground or running out of time.  Want to fly as long as possible.
        flight_time = self.sim.time

        rotor_diff_penalty = np.std(
            rotor_speeds)  # penalize big differences in rotor speed!

        height_reward = self.sim.pose[
            2] / 100.  # reward going higher, since that's what we want! But not by more than distance

        total = target_closeness_reward  \
                 + flight_time           \
                 - eulers_angle_penalty  \
                 - velocity_penalties    \
                 - rotor_diff_penalty    \
                 + height_reward

        #print('rewards: ', target_closeness_reward, velocity_penalties, eulers_angle_penalty,
        #      flight_time, rotor_diff_penalty, total)
        #hyperbolic_activation = np.tanh(total)
        #if (hyperbolic_activation > -1.):
        #    print('Found rewards that crept above -1!: ', total, hyperbolic_activation)
        #return hyperbolic_activation
        return total

    def step(self, rotor_speeds):
        """Uses action to obtain next state, reward, done."""
        reward = 0
        pose_all = []
        for _ in range(self.action_repeat):
            done = self.sim.next_timestep(
                rotor_speeds)  # update the sim pose and velocities
            reward += self.get_reward(rotor_speeds)
            pose_all.append(self.sim.pose)
        next_state = np.concatenate(pose_all)
        return next_state, reward, done

    def reset(self):
        """Reset the sim to start a new episode."""
        self.sim.reset()
        state = np.concatenate([self.sim.pose] * self.action_repeat)
        return state