Beispiel #1
0
class Task():
    """Task (environment) that defines the goal and provides feedback to the agent."""
    def __init__(self, init_pose=None, init_velocities=None, 
        init_angle_velocities=None, runtime=5., target_pos=None, action_repeat=1):
        """Initialize a Task object.
        Params
        ======
            init_pose: initial position of the quadcopter in (x,y,z) dimensions and the Euler angles
            init_velocities: initial velocity of the quadcopter in (x,y,z) dimensions
            init_angle_velocities: initial radians/second for each of the three Euler angles
            runtime: time limit for each episode
            target_pos: target/goal (x,y,z) position for the agent
        """
        # Simulation
        self.sim = PhysicsSim(init_pose, init_velocities, init_angle_velocities, runtime) 
        self.action_repeat = action_repeat

        self.state_size = self.action_repeat * 6
        self.action_low = 1
        self.action_high = 900
        self.action_size = self.action_repeat * 4

        # Goal
        self.target_pos = target_pos if target_pos is not None else np.array([0., 0., 10.]) 

    def get_reward(self):
        """Uses current pose of sim to return reward."""
        reward = 10.
        if self.sim.pose[2] < (self.target_pos[2] - 3.): 
            reward += 2*(self.sim.pose[2] - self.sim.init_pose[2])
            reward -= (abs(self.sim.pose[:2] - self.target_pos[:2]) > 3.).sum()
        else: 
            reward += 10.
            reward -= 2*(abs(self.sim.pose[:3] - self.target_pos) > 3.).sum()
            reward -= np.int32(abs(self.sim.pose[2] - self.target_pos[2]) > 3.)
        
        if self.sim.done:
            if self.sim.time < self.sim.runtime: 
                reward -= 10
                
        reward = reward / 10
        return reward

    def step(self, rotor_speeds):
        """Uses action to obtain next state, reward, done."""
        reward = 0
        pose_all = []
        for _ in range(self.action_repeat):
            done = self.sim.next_timestep(rotor_speeds[(_*4):((_*4)+4)]) # update the sim pose and velocities
            reward += self.get_reward() 
            pose_all.append(self.sim.pose)
        next_state = np.concatenate(pose_all)
        return next_state, (reward / self.action_repeat), done

    def reset(self):
        """Reset the sim to start a new episode."""
        self.sim.reset()
        state = np.concatenate([self.sim.pose] * self.action_repeat) 
        return state
Beispiel #2
0
class TakeOff_Task():
    """TakeOff (environment) that defines the goal and provides feedback to the agent."""
    def __init__(self,
                 init_pose=None,
                 init_velocities=None,
                 init_angle_velocities=None,
                 runtime=5.,
                 target_pos=None):
        """Initialize a Task object.
        Params
        ======
            init_pose: initial position of the quadcopter in (x,y,z) dimensions and the Euler angles
            init_velocities: initial velocity of the quadcopter in (x,y,z) dimensions
            init_angle_velocities: initial radians/second for each of the three Euler angles
            runtime: time limit for each episode
            target_pos: target/goal (x,y,z) position for the agent
        """
        # Simulation
        self.sim = PhysicsSim(init_pose, init_velocities,
                              init_angle_velocities, runtime)
        self.action_repeat = 3

        self.state_size = self.action_repeat * 6
        self.action_size = 4
        self.action_low = 0
        self.action_high = 900

        # Goal: Takeoff the Quadcopter starting from land
        self.target_pos = target_pos if target_pos is not None else np.array(
            [0., 0., 10.])

    def get_reward(self):
        """Uses current pose of sim to return reward."""

        # Keep the Euler angles stable plotting them in a sin funtion
        # Multiply each angle and store, we want them to be between -1 and 1.

        optimal_reward = (1. - abs(math.sin(self.sim.pose[3])))
        optimal_reward *= (1. - abs(math.sin(self.sim.pose[4])))
        optimal_reward *= (1. - abs(math.sin(self.sim.pose[5])))

        # How long we are
        delta = abs(self.sim.pose[:3] - self.target_pos[:3])

        # Make sure we get positive numbers, dot product in order to apply the square root.
        distance = math.sqrt(np.dot(delta, delta))

        if (distance > 0.01):
            penalty = distance
        else:
            penalty = 0
        reward = 1. - penalty
        # Take into account Euler angles
        reward *= optimal_reward
        return reward

    def step(self, rotor_speeds):
        """Uses action to obtain next state, reward, done."""
        reward = 0
        pose_all = []
        for _ in range(self.action_repeat):
            done = self.sim.next_timestep(
                rotor_speeds)  # update the sim pose and velocities
            reward += self.get_reward()
            pose_all.append(self.sim.pose)
        next_state = np.concatenate(pose_all)
        return next_state, reward, done

    def reset(self):
        """Reset the sim to start a new episode."""
        self.sim.reset()
        state = np.concatenate([self.sim.pose] * self.action_repeat)
        return state
Beispiel #3
0
class Task():
    """Task (environment) that defines the goal and provides feedback to the agent."""
    def __init__(self,
                 init_pose=None,
                 init_velocities=None,
                 init_angle_velocities=None,
                 runtime=5.,
                 target_pos=None):
        """Initialize a Task object.
        Params
        ======
            init_pose: initial position of the quadcopter in (x,y,z) dimensions and the Euler angles
            init_velocities: initial velocity of the quadcopter in (x,y,z) dimensions
            init_angle_velocities: initial radians/second for each of the three Euler angles
            runtime: time limit for each episode
            target_pos: target/goal (x,y,z) position for the agent
        """
        # Simulation
        self.sim = PhysicsSim(init_pose, init_velocities,
                              init_angle_velocities, runtime)
        self.action_repeat = 3

        self.state_size = self.action_repeat * 6
        self.action_low = 0
        self.action_high = 900
        self.action_size = 4

        # Goal
        self.target_pos = target_pos if target_pos is not None else np.array(
            [0., 0., 10.])

    def get_reward(self):
        """Uses current pose of sim to return reward."""
        reward = 1. - .001 * (abs(self.sim.pose[:3] - self.target_pos)).sum()

        #distance = np.sqrt(((self.sim.pose[:3] - self.target_pos)**2).sum())

        #if distance < .15:
        #reward += 0.03

        # vertical velocity

        if (abs(self.sim.pose[2] - self.target_pos[2])) < 0.20:
            reward += 0.03

        # adding in x and y velocities

        if (abs(self.sim.pose[0] - self.target_pos[0])) < 0.20:
            reward += 0.03

        if (abs(self.sim.pose[1] - self.target_pos[1])) < 0.20:
            reward += 0.03

        if self.sim.time < self.sim.runtime and self.sim.done:
            reward -= 15

        return reward

    def step(self, rotor_speeds):

        reward = 0
        pose_all = []
        for _ in range(self.action_repeat):
            done = self.sim.next_timestep(
                rotor_speeds)  # update the sim pose and velocities
            reward += self.get_reward()
            pose_all.append(self.sim.pose)
        next_state = np.concatenate(pose_all)
        return next_state, reward, done

    def reset(self):

        self.sim.reset()
        state = np.concatenate([self.sim.pose] * self.action_repeat)
        return state
Beispiel #4
0
class Task():
    """Task (environment) that defines the goal and provides feedback to the agent."""
    def __init__(self,
                 init_pose=None,
                 init_velocities=None,
                 init_angle_velocities=None,
                 runtime=5.,
                 target_pos=None):
        """Initialize a Task object.
        Params
        ======
            init_pose: initial position of the quadcopter in (x,y,z) dimensions and the Euler angles
            init_velocities: initial velocity of the quadcopter in (x,y,z) dimensions
            init_angle_velocities: initial radians/second for each of the three Euler angles
            runtime: time limit for each episode
            target_pos: target/goal (x,y,z) position for the agent
        """
        # Simulation
        self.sim = PhysicsSim(init_pose, init_velocities,
                              init_angle_velocities, runtime)
        self.action_repeat = 1

        self.state_size = self.action_repeat * 6
        self.action_low = 0
        self.action_high = 900
        self.action_size = 4

        # Goal
        self.target_pos = target_pos if target_pos is not None else np.array(
            [0., 0., 10.])

    def get_reward(self):
        """Uses current pose of sim to return reward."""
        reward = -1

        if self.sim.pose[2] >= self.target_pos[2]:
            reward = 50
        elif self.sim.pose[2] <= 0:
            reward = -50
        else:
            #reward = -abs(self.sim.pose[2] - self.target_pos[2])
            #reward = self.sim.pose[2]

            reward += self.sim.v[2]
            reward -= 0.3 * abs(self.sim.pose[:2]).sum()

            #reward -= 0.2 * abs(self.sim.pose[3:]).sum()
            #print(self.sim.pose, self.sim.v)
            #reward -= 0.1 * abs(self.sim.pose[:2]).sum()
        #reward += max(self.sim.v[2], 5) if self.sim.v[2] > 0 else self.sim.v[2]
        #reward -= 0.2 * abs(self.sim.pose[3:]).sum()

        #print(self.sim.pose, self.sim.v)

        return reward

    def step(self, rotor_speeds):
        """Uses action to obtain next state, reward, done."""
        reward = 0
        pose_all = []
        for _ in range(self.action_repeat):
            done = self.sim.next_timestep(
                rotor_speeds)  # update the sim pose and velocities
            if self.sim.pose[2] >= self.target_pos[2]:
                done = True
            reward += self.get_reward()
            pose_all.append(self.sim.pose)
        next_state = np.concatenate(pose_all)
        return next_state, reward, done

    def reset(self):
        """Reset the sim to start a new episode."""
        self.sim.reset()
        state = np.concatenate([self.sim.pose] * self.action_repeat)
        return state
Beispiel #5
0
class Task():
    def __init__(self,
                 target_pos,
                 init_pose=None,
                 init_velocities=None,
                 init_angle_velocities=None,
                 runtime=5.,
                 pos_noise=None,
                 ang_noise=None,
                 vel_noise=None,
                 ang_vel_noise=None):

        self.target_pos = target_pos
        self.pos_noise = pos_noise
        self.ang_noise = ang_noise
        self.vel_noise = vel_noise
        self.ang_vel_noise = ang_vel_noise

        #These are going to get changed a lot
        hover = 400

        #Set low/high for actions
        self.action_high = 1.2 * hover
        self.action_low = 0.99 * hover

        self.action_size = 1

        #Init the velocities to blank defaults if not given to us
        if (init_velocities is None):
            init_velocities = np.array([0.0, 0.0, 0.0])
        if (init_angle_velocities is None):
            init_angle_velocities = np.array([0.0, 0.0, 0.0])

        # Here we create a physics simulation for the task
        # print("start", init_pose, init_velocities, init_angle_velocities)
        self.sim = PhysicsSim(init_pose, init_velocities,
                              init_angle_velocities, runtime)

        self.state_size = len(self.get_state())

        #tanh constants
        self.action_b = (self.action_high + self.action_low) / 2.0
        self.action_m = (self.action_high - self.action_low) / 2.0

    def get_reward(self):
        loss = (self.sim.pose[2] - self.target_pos[2])
        loss = loss**2
        loss += 0.1 * (self.sim.linear_accel[2]**2)

        delta = 0.5
        reward_max = 1
        reward_min = 0

        #Calulate the Huber Loss
        reward = np.maximum(
            reward_max - delta * delta * (np.sqrt(1 + (loss / delta)**2) - 1),
            reward_min)

        return reward

    def normalize_angles(self, angles):
        # We want to keep angles between
        # -1 and +1
        normalized_angles = np.copy(angles)
        for i in range(len(normalized_angles)):
            while normalized_angles[i] > np.pi:
                normalized_angles[i] -= 2 * np.pi
        return normalized_angles

    def get_state(self):
        position_error = (self.sim.pose[:3] - self.target_pos)

        return np.array(
            [position_error[2], self.sim.v[2], self.sim.linear_accel[2]])

    def convert_action(self, action):
        #print (action, self.action_m, self.action_b)
        return (action[0] * self.action_m) + self.action_b

    def step(self, action):
        speed_of_rotors = self.convert_action(action)
        is_done = self.sim.next_timestep(speed_of_rotors * np.ones(4))
        next_state = self.get_state()
        reward = self.get_reward()

        if reward <= 0:
            is_done = True

        return next_state, reward, is_done

    def reset(self):
        self.sim.reset()

        #############################################333
        if self.action_size == 1:
            if self.pos_noise is not None or self.ang_noise is not None:
                new_random_pose = np.copy(self.sim.init_pose)
                if self.pos_noise is not None and self.pos_noise > 0:
                    new_random_pose[2] += np.random.normal(
                        0.0, self.pos_noise, 1)

                self.sim.pose = np.copy(new_random_pose)

            if self.vel_noise is not None:
                new_velocity_random = np.copy(self.sim.init_velocities)
                new_velocity_random[2] += np.random.normal(
                    0.0, self.vel_noise, 1)
                self.sim.v = np.copy(new_velocity_random)
            return self.get_state()
        #############################################333

        #############################################333
        if self.pos_noise is not None or self.ang_noise is not None:
            new_random_pose = np.copy(self.sim.init_pose)
            if self.pos_noise is not None and self.pos_noise > 0:
                new_random_pose[:3] += np.random.normal(0.0, self.pos_noise, 3)
            if self.ang_noise is not None and self.ang_noise > 0:
                new_random_pose[3:] += np.random.normal(0.0, self.ang_noise, 3)

            self.sim.pose = np.copy(new_random_pose)

        #############################################333

        #############################################333
        if self.vel_noise is not None:
            new_velocity_random = np.copy(self.sim.init_velocities)
            new_velocity_random += np.random.normal(0.0, self.vel_noise, 3)
            self.sim.v = np.copy(new_velocity_random)
        #############################################333

        #############################################333
        if self.ang_vel_noise is not None:
            angle_velocity_random = np.copy(self.sim.init_angle_velocities)
            angle_velocity_random += np.random.normal(0.0, self.ang_vel_noise,
                                                      3)
            self.sim.angular_v = np.copy(angle_velocity_random)
        #############################################333

        return self.get_state()
Beispiel #6
0
class Takeoff():
    """Task (environment) that defines the goal and provides feedback to the agent."""
    def __init__(self,
                 init_pose=None,
                 init_velocities=None,
                 init_angle_velocities=None,
                 runtime=5.,
                 target_pos=None):
        """Initialize a Task object.
        Params
        ======
            init_pose: initial position of the quadcopter in (x,y,z) dimensions and the Euler angles
            init_velocities: initial velocity of the quadcopter in (x,y,z) dimensions
            init_angle_velocities: initial radians/second for each of the three Euler angles
            runtime: time limit for each episode
            target_pos: target/goal (x,y,z) position for the agent
        """
        # Simulation
        self.sim = PhysicsSim(init_pose, init_velocities,
                              init_angle_velocities, runtime)
        self.action_repeat = 3

        self.state_size = self.action_repeat * 6
        self.action_low = 0
        self.action_high = 900
        self.action_size = 4

        # Goal
        self.target_pos = target_pos if target_pos is not None else np.array(
            [0., 0., 10.])

        self.start_pos = init_pose[:3] if init_pose is not None else np.array(
            [0., 0., 10.])
        self.start_xyz_dist = np.linalg.norm(self.target_pos - self.start_pos)
        self.start_z_dist = np.linalg.norm(
            np.array([0., 0., self.target_pos[2]]) -
            np.array([0., 0., self.start_pos[2]]))

    def get_reward(self):
        """Uses current pose of sim to return reward."""
        curr_pos = self.sim.pose[:3]
        curr_z = np.array([0., 0., curr_pos[2]])
        target_z = np.array([0., 0., self.target_pos[2]])
        curr_z_dist = np.linalg.norm(target_z - curr_z)
        z_ratio = -(curr_z_dist / self.start_z_dist)
        z_reward = (z_ratio *
                    0.1) if curr_z_dist < self.start_z_dist else z_ratio
        speed_reward = min(0.005, max(0., (self.sim.v[2] - 10.) / 100.))
        reward = z_reward + speed_reward
        return reward

    def step(self, rotor_speeds):
        """Uses action to obtain next state, reward, done."""
        reward = 0
        pose_all = []
        for _ in range(self.action_repeat):
            done = self.sim.next_timestep(
                rotor_speeds)  # update the sim pose and velocities
            reward += self.get_reward()
            pose_all.append(self.sim.pose)
        next_state = np.concatenate(pose_all)
        return next_state, reward / self.action_repeat, done

    def reset(self):
        """Reset the sim to start a new episode."""
        self.sim.reset()
        state = np.concatenate([self.sim.pose] * self.action_repeat)
        self.prev_pos = np.copy(self.start_pos)
        return state
Beispiel #7
0
class Task():
    """Task (environment) that defines the goal and provides feedback to the agent."""
    def __init__(self,
                 init_pose=None,
                 init_velocities=None,
                 init_angle_velocities=None,
                 runtime=5.,
                 target_pos=None):
        """Initialize a Task object.
        Params
        ======
            init_pose: initial position of the quadcopter in (x,y,z) dimensions and the Euler angles
            init_velocities: initial velocity of the quadcopter in (x,y,z) dimensions
            init_angle_velocities: initial radians/second for each of the three Euler angles
            runtime: time limit for each episode
            target_pos: target/goal (x,y,z) position for the agent
        """
        # Simulation
        self.sim = PhysicsSim(init_pose, init_velocities,
                              init_angle_velocities, runtime)
        self.action_repeat = 3

        self.state_size = self.action_repeat * 6
        self.action_low = 0
        self.action_high = 900
        self.action_size = 4

        # Capture init_pose for use in reward function
        self.init_pose = init_pose if init_pose is not None else np.array(
            [0., 0., 0., 0., 0., 0.])

        # Goal
        self.target_pos = target_pos if target_pos is not None else np.array(
            [0., 0., 10.])

    def get_reward(self):
        """Uses current pose of sim to return reward."""
        # Capture provided reward function
        #reward = 1.-.3*(abs(self.sim.pose[:3] - self.target_pos)).sum()
        """Reward design philosophy"""
        # The reward should increase as the copter gets to its target position (target_pos)
        # To to this, normalize the remaining distance by the total distance,
        # then subtract this value from 1, then multiply by a scaling factor
        # Punish rotations and x and y shifts from zero by applying a penalty
        """Calculating the reward"""
        # Calculate the distance between the current position(self.sim.pose[:3]) and target position
        distance_remaining = np.sqrt(
            np.sum([(x - y)**2
                    for x, y in zip(self.sim.pose[:3], self.target_pos)]))
        # Calculate the distance between the starting position and target position
        # This will be used to normalize the reward
        distance_total = np.sqrt(
            np.sum([(x - y)**2
                    for x, y in zip(self.init_pose[:3], self.target_pos)]))
        proximity = 1 - distance_remaining / distance_total
        proximity_reward = 10 * proximity if proximity > 0 else 0
        # Punish rotation
        rotation_punish = sum(
            [1 if ii > 0.03 else 0 for ii in abs(self.sim.pose[3:])])
        # Punish shift
        shift_punish = sum(
            [1 if ii > 1 else 0 for ii in abs(self.sim.pose[:2])])
        reward = proximity_reward - rotation_punish - shift_punish
        reward = reward / 3  # rescale due to x3 action repeat
        return reward

    def step(self, rotor_speeds):
        """Uses action to obtain next state, reward, done."""
        reward = 0
        pose_all = []
        for _ in range(self.action_repeat):
            done = self.sim.next_timestep(
                rotor_speeds)  # update the sim pose and velocities
            reward += self.get_reward()
            pose_all.append(self.sim.pose)
        next_state = np.concatenate(pose_all)
        return next_state, reward, done

    def reset(self):
        """Reset the sim to start a new episode."""
        self.sim.reset()
        state = np.concatenate([self.sim.pose] * self.action_repeat)
        return state
Beispiel #8
0
class Task():
    """Task (environment) that defines the goal and provides feedback to the agent."""
    def __init__(self,
                 init_pose=None,
                 init_velocities=None,
                 init_angle_velocities=None,
                 runtime=5.,
                 target_pos=None):
        """Initialize a Task object.
        Params
        ======
            init_pose: initial position of the quadcopter in (x,y,z) dimensions and the Euler angles
            init_velocities: initial velocity of the quadcopter in (x,y,z) dimensions
            init_angle_velocities: initial radians/second for each of the three Euler angles
            runtime: time limit for each episode
            target_pos: target/goal (x,y,z) position for the agent
        """
        # Simulation
        self.sim = PhysicsSim(init_pose, init_velocities,
                              init_angle_velocities, runtime)
        self.action_repeat = 3

        self.state_size = self.action_repeat * 6
        self.action_low = 0
        self.action_high = 900
        self.action_size = 4

        # Goal
        self.target_pos = target_pos if target_pos is not None else np.array(
            [0., 0., 10.])
        self.old_pos = self.sim.pose

    def get_reward(self):
        """Uses current pose of sim to return reward."""
        #位置
        #         reward = 1.-.3*(abs(self.sim.pose[:3] - self.target_pos)).sum()

        #         reward = -min(abs(self.target_pos[2] - self.sim.pose[2]), 20.0)
        #         if self.sim.pose[2] >= self.target_pos[2] :
        #             reward += 10.0

        #         # 水平面距离
        xy_distance = np.sqrt(
            ((self.target_pos[:2] - self.sim.pose[:2])**2).sum())
        #         水平面速度
        #         xy_velocity = np.sqrt((self.sim.v[:2]**2).sum())
        # 垂直距离
        z_distance = abs(self.target_pos[2] - self.sim.pose[2])
        # 垂直速度
        z_velocity = self.sim.v[2]
        #         # xy平面,相对于上一位置移动的距离
        #         xy_move = np.sqrt(((self.old_pos[:2] - self.sim.pose[:2])**2).sum())
        #垂直平面相对于上一位置移动的距离
        z_move = self.sim.pose[2] - self.old_pos[2]

        # 角速度
        xyz_angular_v = (abs(self.sim.angular_v[:3])).sum()

        reward = -2.0 * z_distance + 4.0 * z_velocity - 4.0 * xyz_angular_v - xy_distance

        #         reward = 1.-.3*(abs(self.sim.pose[:3] - self.target_pos)).sum()

        return reward

    def step(self, rotor_speeds):
        """Uses action to obtain next state, reward, done."""
        reward = 0
        pose_all = []
        for _ in range(self.action_repeat):
            self.old_pos = self.sim.pose
            done = self.sim.next_timestep(
                rotor_speeds)  # update the sim pose and velocities
            reward += self.get_reward()
            pose_all.append(self.sim.pose)
        next_state = np.concatenate(pose_all)
        return next_state, reward, done

    def reset(self):
        """Reset the sim to start a new episode."""
        self.sim.reset()
        self.old_pos = None
        state = np.concatenate([self.sim.pose] * self.action_repeat)
        return state
Beispiel #9
0
class Task():
    """Task (environment) that defines the goal and provides feedback to the agent."""
    def __init__(self,
                 init_pose=None,
                 init_velocities=None,
                 init_angle_velocities=None,
                 runtime=5.,
                 target_pos=None):
        """Initialize a Task object.
        Params
        ======
            init_pose: initial position of the quadcopter in (x,y,z) dimensions and the Euler angles
            init_velocities: initial velocity of the quadcopter in (x,y,z) dimensions
            init_angle_velocities: initial radians/second for each of the three Euler angles
            runtime: time limit for each episode
            target_pos: target/goal (x,y,z) position for the agent
        """
        # Simulation
        self.sim = PhysicsSim(init_pose, init_velocities,
                              init_angle_velocities, runtime)
        self.action_repeat = 5

        self.reset()

        self.state_size = self.state.size
        self.action_low = 0
        self.action_high = 900
        self.action_size = 4
        self.t = 0
        # Goal
        self.target_pos = target_pos if target_pos is not None else np.array(
            [0., 0., 10.])

    def get_reward(self, done, rotor_speeds, in_bounds):
        """Uses current pose of sim to return reward."""

        penalty = 10

        # takeoff task
        reward = 1 - penalty * (not in_bounds) - (1e-2) * (
            np.linalg.norm(self.sim.pose[:3] - self.target_pos)
        )  #- (1e-4)*(np.sum(self.sim.pose[3:]**2)+np.sum(self.sim.angular_v**2))

        # float task
        # reward = 1 - penalty*(not in_bounds) - (1e-4)*(np.sum(self.sim.pose[3:]**2)+np.sum(self.sim.angular_v**2))

        return reward

    def step(self, rotor_speeds):
        """Uses action to obtain next state, reward, done."""
        reward = 0
        #pose_all = []
        for _ in range(self.action_repeat):
            self.t += 1
            # update the sim pose and velocities
            done = self.sim.next_timestep(rotor_speeds)
            reward += self.get_reward(done, rotor_speeds, self.sim.in_bounds)
            #pose_all.append(self.sim.pose)
        #next_state = np.concatenate(pose_all)
        next_state = np.concatenate(
            (self.sim.pose, self.sim.v, self.sim.angular_v))
        return next_state, reward, done

    def reset(self):
        """Reset the sim to start a new episode."""
        self.t = 0
        self.sim.reset()
        #self.state = np.concatenate([self.sim.pose] * self.action_repeat)
        self.state = np.concatenate(
            (self.sim.pose, self.sim.v, self.sim.angular_v))
        return self.state
Beispiel #10
0
class Task():
    """Task (environment) that defines the goal and provides feedback to the agent."""
    def __init__(self,
                 init_pose=None,
                 init_velocities=None,
                 init_angle_velocities=None,
                 runtime=5.,
                 target_pos=None):
        """Initialize a Task object.
        Params
        ======
            init_pose: initial position of the quadcopter in (x,y,z) dimensions and the Euler angles
            init_velocities: initial velocity of the quadcopter in (x,y,z) dimensions
            init_angle_velocities: initial radians/second for each of the three Euler angles
            runtime: time limit for each episode
            target_pos: target/goal (x,y,z) position for the agent
        """
        # Simulation
        self.sim = PhysicsSim(init_pose, init_velocities,
                              init_angle_velocities, runtime)
        self.action_repeat = 3

        self.state_size = self.action_repeat * 6
        self.action_low = 0
        self.action_high = 900
        self.action_size = 4

        # last position
        self.pose_last = self.sim.pose[:3]

        # Goal
        self.target_pos = target_pos if target_pos is not None else np.array(
            [0., 0., 10.])

    def get_reward(self):
        """Uses current pose of sim to return reward."""
        #reward = 1.-0.3*abs(self.sim.pose[:3] - self.target_pos).sum()

        #distance = np.linalg.norm(self.target_pos - self.sim.pose[:3])
        #distance_last = np.linalg.norm(self.target_pos - self.pose_last)
        #reward = 0. + (distance_last - distance)

        #pose_diff = self.target_pos - self.sim.pose[:3]
        #e_R = pose_diff/np.linalg.norm(pose_diff)
        #e_v = self.sim.v/np.linalg.norm(self.sim.v)
        #reward =  np.dot(self.sim.v, e_R)

        #reward = 1.-0.003*np.linalg.norm(self.target_pos - self.sim.pose[:3])
        #reward = np.tanh(1.-.003*abs(self.sim.pose[:3] - self.target_pos) ).sum()
        #reward = np.tanh(1.-.003*abs(self.sim.pose[:3] - self.target_pos).sum() )
        reward_xy = np.tanh(
            1. - .01 * abs(self.sim.pose[:2] - self.target_pos[:2])).sum()
        reward_z = np.tanh(1. -
                           .005 * abs(self.sim.pose[2] - self.target_pos[2]))
        reward = reward_xy + reward_z

        #reward = np.tanh(1.-.009*np.linalg.norm(self.sim.pose[:3] - self.target_pos) )
        return reward

    def step(self, rotor_speeds):
        """Uses action to obtain next state, reward, done."""
        reward = 0
        pose_all = []
        for _ in range(self.action_repeat):
            self.pose_last = self.sim.pose[:3]  # record last position
            done = self.sim.next_timestep(
                rotor_speeds)  # update the sim pose and velocities
            reward += self.get_reward()
            pose_all.append(self.sim.pose)
        next_state = np.concatenate(pose_all)
        return next_state, reward, done

    def reset(self):
        """Reset the sim to start a new episode."""
        self.sim.reset()
        state = np.concatenate([self.sim.pose] * self.action_repeat)
        return state
Beispiel #11
0
class Task():
    """Task (environment) that defines the goal and provides feedback to the agent."""
    def __init__(self,
                 init_pose=None,
                 init_velocities=None,
                 init_angle_velocities=None,
                 runtime=5.,
                 target_pos=None):
        """Initialize a Task object.
        Params
        ======
            init_pose: initial position of the quadcopter in (x,y,z) dimensions and the Euler angles
            init_velocities: initial velocity of the quadcopter in (x,y,z) dimensions
            init_angle_velocities: initial radians/second for each of the three Euler angles
            runtime: time limit for each episode
            target_pos: target/goal (x,y,z) position for the agent
        """
        # Simulation
        self.sim = PhysicsSim(init_pose, init_velocities,
                              init_angle_velocities, runtime)
        self.action_repeat = 1
        self._states_to_be_used = ["pose", "linear_accel", "angular_v"]

        self.state_size = self.action_repeat * sum(
            [len(getattr(self.sim, x)) for x in self._states_to_be_used])
        self.action_low = 0
        self.action_high = 900
        self.action_size = 4

        # Goal
        self.target_pos = target_pos if target_pos is not None else np.array(
            [0., 0., 10.])

    def get_states(self):
        return [getattr(self.sim, x) for x in self._states_to_be_used]

    def get_reward(self):
        """Uses current pose of sim to return reward."""
        # position_score = max(10.-.3*np.linalg.norm(self.sim.pose[:3] - self.target_pos), -10)
        position_score = 10. - .3 * np.linalg.norm(self.sim.pose[:3] -
                                                   self.target_pos)**2
        # angular_stationary_score = max(10.-.3*np.linalg.norm(self.sim.angular_v), -10)
        angular_stationary_score = 10. - .3 * np.linalg.norm(
            self.sim.angular_v)**2
        reward = (position_score + .1 * angular_stationary_score) / 10.
        # reward = np.sqrt(np.abs(reward)) * np.sign(reward)
        return reward

    def step(self, rotor_speeds):
        """Uses action to obtain next state, reward, done."""
        reward = 0
        pose_all = []
        for _ in range(self.action_repeat):
            done = self.sim.next_timestep(
                rotor_speeds)  # update the sim pose and velocities
            reward += self.get_reward()
            pose_all.append(np.concatenate(self.get_states()))
        next_state = np.concatenate(pose_all)
        return next_state, reward, done

    def reset(self):
        """Reset the sim to start a new episode."""
        self.sim.reset()
        state = np.concatenate(self.get_states() * self.action_repeat)
        return state
class Task():
    """Task (environment) that defines the goal and provides feedback to the agent."""
    def __init__(self,
                 init_pose=None,
                 init_velocities=None,
                 init_angle_velocities=None,
                 runtime=5.,
                 target_pos=None):
        """Initialize a Task object.
        Params
        ======
            init_pose: initial position of the quadcopter in (x,y,z) dimensions and the Euler angles
            init_velocities: initial velocity of the quadcopter in (x,y,z) dimensions
            init_angle_velocities: initial radians/second for each of the three Euler angles
            runtime: time limit for each episode
            target_pos: target/goal (x,y,z) position for the agent
        """
        # Simulation
        self.sim = PhysicsSim(init_pose, init_velocities,
                              init_angle_velocities, runtime)
        self.action_repeat = 3

        self.state_size = self.action_repeat * 6
        self.action_low = 300
        self.action_high = 500
        self.action_size = 4

        # Goal
        if target_pos is None:
            print("Setting default init pose")
        self.target_pos = target_pos if target_pos is not None else np.array(
            [0., 0., 10.])

    def get_reward(self):
        """Uses current pose of sim to return reward."""
        #         reward = -min(abs(self.target_pos[2] - self.sim.pose[2]), 20.0)  # reward = zero for matching target z, -ve as you go farther, upto -20
        #         z_reward = np.tanh(1 - 0.003*(abs(self.sim.pose[2] - self.target_pos[2]))).sum()
        #         xy_reward = np.tanh(1 - 0.009*(abs(self.sim.pose[:2] - self.target_pos[:2]))).sum()
        #         reward = z_reward + xy_reward
        reward = 0.0
        sim = self.sim
        reward += 0.01 * sim.pose[2]
        reward -= 0.02 * (abs(sim.pose[:2])).sum()

        reward -= 0.025 * (abs(sim.pose[5]))

        reward += 0.005 * sim.v[2]
        reward -= 0.01 * (abs(sim.v[:2])).sum()

        reward -= 0.025 * (abs(self.sim.angular_v[:3])).sum()
        if self.sim.angular_v[2] == 0 or self.sim.pose[5] == 0:
            reward += 100

        return reward / 100

    def step(self, rotor_speeds):
        """Uses action to obtain next state, reward, done."""
        reward = 0
        pose_all = []
        for _ in range(self.action_repeat):
            done = self.sim.next_timestep(
                rotor_speeds)  # update the sim pose and velocities
            reward += self.get_reward()
            pose_all.append(self.sim.pose)
        next_state = np.concatenate(pose_all)
        return next_state, reward, done

    def reset(self):
        """Reset the sim to start a new episode."""
        self.sim.reset()
        state = np.concatenate([self.sim.pose] * self.action_repeat)
        return state
Beispiel #13
0
class Task():
    """Task (environment) that defines the goal and provides feedback to the agent."""
    def __init__(self,
                 init_pose=None,
                 init_velocities=None,
                 init_angle_velocities=None,
                 runtime=5.,
                 target_pos=None):
        """Initialize a Task object.
        Params
        ======
            init_pose: initial position of the quadcopter in (x,y,z) dimensions and the Euler angles
            init_velocities: initial velocity of the quadcopter in (x,y,z) dimensions
            init_angle_velocities: initial radians/second for each of the three Euler angles
            runtime: time limit for each episode
            target_pos: target/goal (x,y,z) position for the agent
        """
        # Simulation
        self.sim = PhysicsSim(init_pose, init_velocities,
                              init_angle_velocities, runtime)
        self.action_repeat = 3

        self.state_size = self.action_repeat * 6
        self.action_low = 410
        self.action_high = 420
        self.action_size = 4

        # Goal
        self.target_pos = target_pos if target_pos is not None else np.array(
            [0., 0., 10.])

    def get_reward(self, done):
        """Uses current pose of sim to return reward."""
        down_penalty = 0
        run_time_penalty = 0
        vertical_coeff = 0.05
        if self.sim.pose[2] < 0.5:
            down_penalty = -10
        vert_pos = np.linalg.norm(self.sim.pose[2] - self.target_pos[2])
        if vert_pos < 2:
            vertical_coeff = 0.01
        rot_pos = abs(self.sim.pose[3:]).sum()
        x_y_pos = np.linalg.norm(self.sim.pose[:2] - self.target_pos[:2])
        return 1 - .005 * (vert_pos) - .001 * (x_y_pos) - .001 * (
            rot_pos) + down_penalty

    def step(self, rotor_speeds):
        """Uses action to obtain next state, reward, done."""
        reward = 0
        pose_all = []
        for _ in range(self.action_repeat):
            done = self.sim.next_timestep(
                rotor_speeds)  # update the sim pose and velocities
            reward += self.get_reward(done)
            pose_all.append(self.sim.pose)
#         if self.sim.pose[2] > 10.0:
#                 done = True
        next_state = np.concatenate(pose_all)
        return next_state, reward, done

    def reset(self):
        """Reset the sim to start a new episode."""
        self.sim.reset()
        state = np.concatenate([self.sim.pose] * self.action_repeat)
        return state
Beispiel #14
0
class Task():
    """Task (environment) that defines the goal and provides feedback to the agent."""
    def __init__(self,
                 init_pose=None,
                 init_velocities=None,
                 init_angle_velocities=None,
                 runtime=5.,
                 target_pos=None):
        """Initialize a Task object.
        Params
        ======
            init_pose: initial position of the quadcopter in (x,y,z) dimensions and the Euler angles
            init_velocities: initial velocity of the quadcopter in (x,y,z) dimensions
            init_angle_velocities: initial radians/second for each of the three Euler angles
            runtime: time limit for each episode
            target_pos: target/goal (x,y,z) position for the agent
        """
        # Simulation
        self.sim = PhysicsSim(init_pose, init_velocities,
                              init_angle_velocities, runtime)
        self.action_repeat = 3

        self.state_size = self.action_repeat * 6
        self.action_low = 0
        self.action_high = 900
        self.action_size = 4

        # Goal
        self.target_pos = target_pos if target_pos is not None else np.array(
            [0., 0., 10.])

    def get_reward(self):
        """Uses current pose of sim to return reward."""
        reward = 1. - .3 * (abs(self.sim.pose[:3] - self.target_pos)).sum()

        return reward

    def get_reward_enanched(self):
        """Uses current pose of sim to return reward."""

        # penalty growing with euler angles so we mantain stability
        stability_score = 100. - .4 * abs(self.sim.angular_v[:3]).sum()

        # more the distance, more the penalty
        position_score = 100. - .6 * np.linalg.norm(self.sim.pose[:3] -
                                                    self.target_pos)**2

        reward = stability_score + position_score

        return reward

    def step(self, rotor_speeds, enanched=False):
        """Uses action to obtain next state, reward, done."""
        reward = 0
        pose_all = []
        for _ in range(self.action_repeat):
            done = self.sim.next_timestep(
                rotor_speeds)  # update the sim pose and velocities
            if enanched:
                reward += self.get_reward_enanched()
            else:
                reward += self.get_reward()
            pose_all.append(self.sim.pose)

        next_state = np.concatenate(pose_all)
        return next_state, reward, done

    def reset(self):
        """Reset the sim to start a new episode."""
        self.sim.reset()
        state = np.concatenate([self.sim.pose] * self.action_repeat)
        return state
Beispiel #15
0
class Task():
    """Task (environment) that defines the goal and provides feedback to the agent."""
    def __init__(self,
                 init_pose=None,
                 init_velocities=None,
                 init_angle_velocities=None,
                 runtime=5.,
                 target_pos=None):
        """Initialize a Task object.
        Params
        ======
            init_pose: initial position of the quadcopter in (x,y,z) dimensions and the Euler angles
            init_velocities: initial velocity of the quadcopter in (x,y,z) dimensions
            init_angle_velocities: initial radians/second for each of the three Euler angles
            runtime: time limit for each episode
            target_pos: target/goal (x,y,z) position for the agent
        """
        # Simulation
        self.sim = PhysicsSim(init_pose, init_velocities,
                              init_angle_velocities, runtime)
        self.action_repeat = 3

        self.state_size = self.action_repeat * 3
        self.action_low = 300
        self.action_high = 900
        self.action_size = 4

        # Goal
        self.target_pos = target_pos if target_pos is not None else np.array(
            [0., 0., 10.])

    def get_reward(self):
        """Uses current pose of sim to return reward."""
        [[reward]] = cosine_similarity(self.sim.pose[:3].reshape(1, -1),
                                       self.target_pos.reshape(1, -1))
        # print('reward', reward)
        # reward = np.linalg.norm(self.sim.pose[:3] - self.target_pos)
        # if reward < 4:
        #     reward
        return reward  #1. - 0.3 * abs(self.sim.pose[:3] - self.target_pos).sum()

    def step(self, rotor_speeds):
        """Uses action to obtain next state, reward, done."""
        reward = 0
        pose_all = []
        for _ in range(self.action_repeat):
            done = self.sim.next_timestep(
                rotor_speeds)  # update the sim pose and velocities
            reward += self.get_reward()
            pose_all.append(self.sim.pose)
        next_state = np.concatenate(pose_all)
        # print('next_state', next_state)
        # print('angular_accels', self.sim.angular_accels)
        # print('angular_velocity', self.sim.angular_accels)

        return next_state, reward, done

    def reset(self):
        """Reset the sim to start a new episode."""
        self.sim.reset()
        state = np.concatenate([self.sim.pose] * self.action_repeat)
        return state
Beispiel #16
0
class CustomTask():
    """Task (environment) that defines the goal and provides feedback to the agent."""
    def __init__(self, init_pose=None, init_velocities=None, 
        init_angle_velocities=None, runtime=5., target_pos=None):
        """Initialize a Task object.
        Params
        ======
            init_pose: initial position of the quadcopter in (x,y,z) dimensions and the Euler angles
            init_velocities: initial velocity of the quadcopter in (x,y,z) dimensions
            init_angle_velocities: initial radians/second for each of the three Euler angles
            runtime: time limit for each episode
            target_pos: target/goal (x,y,z) position for the agent
        """
        # Simulation
        self.sim = PhysicsSim(init_pose, init_velocities, init_angle_velocities, runtime) 
        self.action_repeat = 3

        self.state_size = self.action_repeat * 6
        self.action_low = 0
        self.action_high = 900
        self.action_size = 4

        # Goal
        self.target_pos = target_pos if target_pos is not None else np.array([0., 0., 10.]) 

    def get_reward(self):
        """Uses current pose of sim to return reward."""
        reward = np.tanh(1.-.01*(abs(self.sim.pose[:3] - self.target_pos))).sum()
     
        
        loss_x = abs(self.sim.pose[0] - self.target_pos[0])**2
        loss_y = abs(self.sim.pose[1] - self.target_pos[1])**2
        loss_z = abs(self.sim.pose[2] - self.target_pos[2])**2
        
        threshold = 0.10
        
        distance = np.sqrt(loss_x + loss_y + loss_z)
        if distance < threshold:
            reward += 0.2
            
        if self.sim.time > self.sim.runtime:  # agent has run out of time
            reward -= 10.0  # extra penalty
            done = True
            
        if self.sim.pose[2] >= self.target_pos[2]:  # agent has crossed the target height
            reward += 10.0  # bonus reward
            done = True

        return np.tanh(reward)

    def step(self, rotor_speeds):
        """Uses action to obtain next state, reward, done."""
        reward = 0
        pose_all = []
        for _ in range(self.action_repeat):
            done = self.sim.next_timestep(rotor_speeds) # update the sim pose and velocities
            reward += self.get_reward() 
            pose_all.append(self.sim.pose)
        next_state = np.concatenate(pose_all)
        return next_state, reward, done

    def reset(self):
        """Reset the sim to start a new episode."""
        self.sim.reset()
        state = np.concatenate([self.sim.pose] * self.action_repeat) 
        return state
Beispiel #17
0
class Takeoff:
    """Task (environment) that defines the goal and provides feedback to the agent."""
    def __init__(self, init_pose=None, debug=False):
        """Initialize a Task object.
        Params
        ======
            init_pose: initial position of the quadcopter in (x,y,z) dimensions and the Euler angles
            init_velocities: initial velocity of the quadcopter in (x,y,z) dimensions
            init_angle_velocities: initial radians/second for each of the three Euler angles
            runtime: time limit for each episode
            target_pos: target/goal (x,y,z) position for the agent
        """
        self.sim = PhysicsSim(init_pose, None, None, 5.0)
        self.action_repeat = 1

        self.state_size = self.action_repeat * 5
        self.action_low = 0
        self.action_high = 900
        self.action_size = 4
        self.debug = debug

    def normalize_angles(self, euler_angles):
        """Normalize Euler angles from 0,2*pi to -1,1 range."""
        angles = copy.copy(euler_angles)

        for ii in range(len(angles)):
            angles[ii] = angles[ii] if (angles[ii] < np.pi) else (angles[ii] -
                                                                  2 * np.pi)
            angles[ii] /= np.pi
        return angles

    def get_reward(self, rotor_speeds):
        """Uses current pose of sim to return reward."""

        # Normalize roll and pitch to [-1,1]
        norm_eulers = self.normalize_angles(self.sim.pose[3:5 + 1])
        if self.debug:
            # Print latest position and normalized Euler angles
            print(
                "(x',y',z')=({:6.2f},{:6.2f},{:6.2f}), (phi,theta,ksi)=({:6.2f},{:6.2f},{:6.2f})"
                .format(*self.sim.v[:3], *norm_eulers))
            sys.stdout.flush()

        # Cost of excessive roll or pitch. Normalized to [-5,0].
        attitude_cost = -(5 / 2) * sum(abs(norm_eulers[:2]))
        # Cost of lateral movement ignored for now.
        # normalize to -5,0
        lateral_cost = -sum(abs(self.sim.v[0:2]))
        # Reward for z-speed up minus cost of rolling or pitching
        reward = self.sim.v[2] + attitude_cost
        #reward = self.sim.v[2] + 1./np.std(rotor_speeds)+ lateral_cost + attitude_cost

        # Reward for this step + debugging info
        return reward, lateral_cost, attitude_cost

    def step(self, rotor_speeds):
        """Uses action to obtain next state, reward, done."""
        reward = np.zeros(3)
        pose_all = []

        for _ in range(self.action_repeat):
            done = self.sim.next_timestep(
                rotor_speeds)  # update the sim pose and velocities
            reward += np.array(self.get_reward(rotor_speeds))
            #pose_all.append(self.sim.pose)
            pose_all.append(np.hstack([self.sim.v, self.sim.pose[3:5]]))

        next_state = np.concatenate(pose_all)
        return next_state, reward, done

    def reset(self):
        """Reset the sim to start a new episode."""
        self.sim.reset()
        #state = np.concatenate([self.sim.pose] * self.action_repeat)
        state = np.concatenate([np.hstack([self.sim.v, self.sim.pose[3:5]])] *
                               self.action_repeat)
        return state
Beispiel #18
0
class Task():
    """Task (environment) that defines the goal and provides feedback to the agent."""
    def __init__(self,
                 init_pose=None,
                 init_velocities=None,
                 init_angle_velocities=None,
                 runtime=5.,
                 target_pos=None):
        """Initialize a Task object.
        Params
        ======
            init_pose: initial position of the quadcopter in (x,y,z) dimensions and the Euler angles
            init_velocities: initial velocity of the quadcopter in (x,y,z) dimensions
            init_angle_velocities: initial radians/second for each of the three Euler angles
            runtime: time limit for each episode
            target_pos: target/goal (x,y,z) position for the agent
        """
        # Simulation
        self.sim = PhysicsSim(init_pose, init_velocities,
                              init_angle_velocities, runtime)
        self.action_repeat = 3

        self.state_size = self.action_repeat * 6
        self.action_low = 0
        self.action_high = 900
        self.action_size = 4
        # Added
        self.success = False

        # Goal
        self.target_pos = target_pos if target_pos is not None else np.array(
            [0., 0., 10.])

    def get_reward(self):
        """Uses current pose of sim to return reward."""
        # Check if it reaches target height
        if self.sim.pose[2] >= self.target_pos[2]:
            self.success = True
            return 1

        # gain better reward if it comes closer to target pos
        reward = 1. - min(self.sim.pose[2] - self.target_pos[2], 100) / 100
        #reward = 1.-.3*(abs(self.sim.pose[:3] - self.target_pos)).sum()
        return reward

    def step(self, rotor_speeds):
        """Uses action to obtain next state, reward, done."""
        # Modified to teach quadcopter to takeoff to a target height
        reward = 0
        pose_all = []
        for _ in range(self.action_repeat):
            done = self.sim.next_timestep(
                rotor_speeds)  # update the sim pose and velocities
            reward += self.get_reward()
            pose_all.append(self.sim.pose)
        next_state = np.concatenate(pose_all)

        # agent will receive a reward of 10 if it reaches the target height and penalty of -10 if it doesn't
        if done:
            if self.success:
                reward += 10
            else:
                reward -= 10

        return next_state, reward, done

    def reset(self):
        """Reset the sim to start a new episode."""
        self.sim.reset()
        # reset status
        self.success = False
        state = np.concatenate([self.sim.pose] * self.action_repeat)
        return state
Beispiel #19
0
class Task():
    """Task (environment) that defines the goal and provides feedback to the agent."""
    def __init__(self,
                 obs='pos',
                 init_pose=None,
                 init_velocities=None,
                 init_angle_velocities=None,
                 runtime=5.,
                 target_pos=None):
        """Initialize a Task object.
        Params
        ======
            obs: 'pos', 'pos_v', 'pos_v_ang'
            init_pose: initial position of the quadcopter in (x,y,z) dimensions and the Euler angles
            init_velocities: initial velocity of the quadcopter in (x,y,z) dimensions
            init_angle_velocities: initial radians/second for each of the three Euler angles
            runtime: time limit for each episode
            target_pos: target/goal (x,y,z) position for the agent
        """
        # make an sim object
        self.sim = PhysicsSim(init_pose, init_velocities,
                              init_angle_velocities, runtime)
        self.obs = obs
        self.action_repeat = 3

        # task's observation size
        if self.obs == 'pos':
            s_len = 6  # position [x, y, z, phi, theta, psi]
        elif self.obs == 'pos_v':
            s_len = 9  # position + velocity [vx, vy, vz]
        elif self.obs == 'pos_v_ang':
            s_len = 12  # position + velocity + angular velocity [vphi, vtheta, vpsi]
        self.state_size = self.action_repeat * s_len
        self.action_low = 0
        self.action_high = 900
        self.action_size = 4  # 4 roter speeds

        # Goal
        self.target_pos = target_pos if target_pos is not None else np.array(
            [0., 0., 10.])

    def get_reward(self):
        """
        Reward function for task "takeoff"
        """
        ## difference vector
        delta = self.sim.pose[:3] - self.target_pos

        ## Euclidean distance to the target
        dtotal = np.sqrt(np.dot(delta, delta))

        ## abs distance in z (height) to the target
        dz = np.abs(delta[2])

        ## reward dtotal (300 m is the env bound of the sim)
        reward = distance_reward(dtotal, dmax=np.sqrt(300**2 + 300**2 + 300**2)) \
                * time_reward(self.sim.time, tmax=self.sim.runtime)

        ## success condition: height
        if dz < 1:
            self.sim.done = True  # end the episode. (no further thing to learn if target is reached.)
            reward += 100  # an extra big reward. (to avoid whirling around the target for collecting positive reward)

        return reward

    def step(self, rotor_speeds):
        """Uses action to obtain next state, reward, done."""
        reward = 0
        stmp = []
        for _ in range(self.action_repeat):
            done = self.sim.next_timestep(
                rotor_speeds)  # update the sim pose and velocities
            reward += self.get_reward()
            if self.obs == 'pos':
                s = np.array(self.sim.pose)  # shape (6,)
            elif self.obs == 'pos_v':
                s = np.concatenate((self.sim.pose, self.sim.v))  # shape (9,)
            elif self.obs == 'pos_v_ang':
                s = np.concatenate((self.sim.pose, self.sim.v,
                                    self.sim.angular_v))  # shape (12,)
            stmp.append(s)
        next_state = np.concatenate(stmp)
        return next_state, reward, done

    def reset(self):
        """Reset the sim to start a new episode."""
        self.sim.reset()
        if self.obs == 'pos':
            s = np.array(self.sim.pose)  # shape (6,)
        elif self.obs == 'pos_v':
            s = np.concatenate((self.sim.pose, self.sim.v))  # shape (9,)
        elif self.obs == 'pos_v_ang':
            s = np.concatenate(
                (self.sim.pose, self.sim.v, self.sim.angular_v))  # shape (12,)
        state = np.concatenate([s] * self.action_repeat)
        return state
Beispiel #20
0
class Task():
    """Task (environment) that defines the goal and provides feedback to the agent."""
    def __init__(self, init_pose=None, init_velocities=None, 
        init_angle_velocities=None, runtime=5., target_pos=None):
        """Initialize a Task object.
        Params
        ======
            init_pose: initial position of the quadcopter in (x,y,z) dimensions and the Euler angles
            init_velocities: initial velocity of the quadcopter in (x,y,z) dimensions
            init_angle_velocities: initial radians/second for each of the three Euler angles
            runtime: time limit for each episode
            target_pos: target/goal (x,y,z) position for the agent
        """
        # Simulation
        self.sim = PhysicsSim(init_pose, init_velocities, init_angle_velocities, runtime) 
        self.action_repeat = 3

        self.state_size = self.action_repeat * 6
        self.action_low = 0
        self.action_high = 900
        self.action_size = 4

        # Goal
        if target_pos is None :
            print("Setting default init pose")
        self.target_pos =  10.0
        
        self.max_duration = 10.0 #sec

    def get_reward(self):
        """Uses current pose of sim to return reward."""
       
         
        if (self.target_pos - self.sim.pose[2]) < 0:
            
            if (self.target_pos - self.sim.pose[2])> -3:
                reward = np.tanh(1 - 0.0005*(abs(self.target_pos - self.sim.pose[2])).sum())/5   
                
            if (self.target_pos - self.sim.pose[2])< -3:
                reward = np.tanh(1 - 0.0005*(abs(self.target_pos - self.sim.pose[2])).sum())*2
            
        else:
            reward = np.tanh(1 - 0.0005*(abs(self.target_pos - self.sim.pose[2])).sum())        
        

        return reward


    def step(self, rotor_speeds):
        """Uses action to obtain next state, reward, done."""
        reward = 0
        pose_all = []
        for _ in range(self.action_repeat):
            done = self.sim.next_timestep(rotor_speeds) # update the sim pose and velocities
            reward += self.get_reward()
            pose_all.append(self.sim.pose)
            if done :
              #  reward += 10
                
                if self.sim.pose[2] >= self.target_pos:  # agent has crossed the target height
                    reward += 7.0  # bonus reward
                    
                if self.sim.time > self.max_duration:  # agent has run out of time
                    reward -= 10.0  # extra penalty
                if self.sim.time < self.max_duration:  # agent has run out of time
                    reward += 3.0 # bonus reward
                    
                    
        next_state = np.concatenate(pose_all)
        return next_state, reward, done

    def reset(self):
        """Reset the sim to start a new episode."""
        self.sim.reset()
        state = np.concatenate([self.sim.pose] * self.action_repeat) 
        return state
Beispiel #21
0
class Task():
    """Task (environment) that defines the goal and provides feedback to the agent."""
    def __init__(self,
                 init_pose=None,
                 init_velocities=None,
                 init_angle_velocities=None,
                 runtime=5.,
                 target_pos=None):
        """Initialize a Task object.
        Params
        ======
            init_pose: initial position of the quadcopter in (x,y,z) dimensions and the Euler angles
            init_velocities: initial velocity of the quadcopter in (x,y,z) dimensions
            init_angle_velocities: initial radians/second for each of the three Euler angles
            runtime: time limit for each episode
            target_pos: target/goal (x,y,z) position for the agent
        """
        # Simulation
        self.sim = PhysicsSim(init_pose, init_velocities,
                              init_angle_velocities, runtime)
        self.action_repeat = 3

        self.state_size = self.action_repeat * 6
        self.action_low = 0
        self.action_high = 900
        self.action_size = 4

        # Goal
        self.target_pos = target_pos if target_pos is not None else np.array(
            [0., 0., 10.])
        self.task_success = False

    def get_reward(self):
        """Uses current pose of sim to return reward."""
        return -1.0 * abs(self.sim.pose[2] - self.target_pos[2])

    def step(self, rotor_speeds):
        """Uses action to obtain next state, reward, done."""
        reward = 0
        pose_all = []
        for _ in range(self.action_repeat):
            done = self.sim.next_timestep(
                rotor_speeds)  # update the sim pose and velocities
            reward += self.get_reward()

            # Terminal Rewards / Penalties for Task completion / failure
            epsilon = 2.0
            diff = abs(self.sim.pose[2] - self.target_pos[2])
            if diff <= epsilon:
                reward += 1000
                self.task_success = True
            elif diff > epsilon and done == True:
                reward -= 1000

            pose_all.append(self.sim.pose)
        next_state = np.concatenate(pose_all)

        return next_state, reward, done

    def reset(self):
        """Reset the sim to start a new episode."""
        self.sim.reset()
        self.task_success = False
        state = np.concatenate([self.sim.pose] * self.action_repeat)
        return state
Beispiel #22
0
class Task():
    """Task (environment) that defines the goal and provides feedback to the agent."""
    def __init__(self,
                 init_pose=None,
                 init_velocities=None,
                 init_angle_velocities=None,
                 runtime=5.,
                 target_pose=None):
        """Initialize a Task object.
        Params
        ======
            init_pose: initial position of the quadcopter in (x,y,z) dimensions and the Euler angles
            init_velocities: initial velocity of the quadcopter in (x,y,z) dimensions
            init_angle_velocities: initial radians/second for each of the three Euler angles
            runtime: time limit for each episode
            target_pos: target/goal (x,y,z) position for the agent
        """
        # Simulation
        self.sim = PhysicsSim(init_pose, init_velocities,
                              init_angle_velocities, runtime)
        self.action_repeat = 3

        self.state_size = self.action_repeat * 6
        self.action_low = 0
        self.action_high = 900
        self.action_size = 4
        self.init_z = init_pose[2]

        # Goal
        self.target_pose = target_pose if target_pose is not None else np.array(
            [0., 0., 10.])

    def get_reward(self):
        """Uses current pose of sim to return reward."""
        """Takeoff"""
        # punish_x = np.tanh(abs(self.sim.pose[0] - self.target_pose[0]))
        #punish_y = np.tanh(abs(self.sim.pose[1] - self.target_pose[1]))

        reward_z = 3 * np.tanh(self.sim.pose[2] - self.init_z)

        punish_rot1 = np.tanh(abs(self.sim.pose[3]))
        punish_rot2 = np.tanh(abs(self.sim.pose[4]))
        punish_rot3 = np.tanh(abs(self.sim.pose[5]))

        reward = reward_z - punish_rot1 - punish_rot2 - punish_rot3

        dist = abs(np.linalg.norm(self.target_pose[:3] - self.sim.pose[:3]))

        if dist < 3:
            reward += 10 * np.tanh(dist)
        else:
            reward -= np.tanh(dist)

        reward -= np.tanh(np.linalg.norm(self.sim.v))
        reward -= np.tanh(np.linalg.norm(self.sim.angular_v))

        if self.sim.v[2] > 0:
            reward += np.tanh(self.sim.v[2])

        return reward

    def step(self, rotor_speeds):
        """Uses action to obtain next state, reward, done."""
        reward = 0
        pose_all = []
        for _ in range(self.action_repeat):
            done = self.sim.next_timestep(
                rotor_speeds)  # update the sim pose and velocities
            reward += self.get_reward()
            pose_all.append(self.sim.pose)
        next_state = np.concatenate(pose_all)
        return next_state, reward, done

    def reset(self):
        """Reset the sim to start a new episode."""
        self.sim.reset()
        state = np.concatenate([self.sim.pose] * self.action_repeat)
        return state
Beispiel #23
0
class Task():
    """Task (environment) that defines the goal and provides feedback to the agent."""
    def __init__(self,
                 init_pose=None,
                 init_velocities=None,
                 init_angle_velocities=None,
                 runtime=5.,
                 target_pos=None):
        """Initialize a Task object.
        Params
        ======
            init_pose: initial position of the quadcopter in (x,y,z) dimensions and the Euler angles
            init_velocities: initial velocity of the quadcopter in (x,y,z) dimensions
            init_angle_velocities: initial radians/second for each of the three Euler angles
            runtime: time limit for each episode
            target_pos: target/goal (x,y,z) position for the agent
        """
        # Simulation
        self.sim = PhysicsSim(init_pose, init_velocities,
                              init_angle_velocities, runtime)
        self.action_repeat = 3

        self.state_size = self.action_repeat * 6
        self.action_low = 0
        self.action_high = 900
        self.action_size = 4

        # Goal
        self.target_pos = target_pos if target_pos is not None else np.array(
            [0., 0., 10.])

    def get_reward_new(self):
        """Uses current pose of sim to return reward."""

        #return reward
        reward = 0.0

        # Reward positive velocity along z-axis
        reward += self.sim.v[2]

        # Reward positions close to target along z-axis
        reward -= (abs(self.sim.pose[2] - self.target_pos[2])) / 2.0

        # A lower sensativity towards drifting in the xy-plane
        reward -= (abs(self.sim.pose[:2] - self.target_pos[:2])).sum() / 4.0

        reward -= (abs(self.sim.angular_v[:3])).sum()

        return reward

    def step_new(self, rotor_speeds):
        """Uses action to obtain next state, reward, done."""
        reward = 0
        pose_all = []
        for _ in range(self.action_repeat):
            done = self.sim.next_timestep(
                rotor_speeds)  # update the sim pose and velocities
            reward += self.get_reward_new()
            pose_all.append(self.sim.pose)

            if (self.sim.pose[2] >= self.target_pos[2]):
                reward += 100
                done = True
        next_state = np.concatenate(pose_all)
        return next_state, reward, done

    def get_reward(self):
        """Uses current pose of sim to return reward."""
        reward = 1. - .3 * (abs(self.sim.pose[:3] - self.target_pos)).sum()
        return reward

    def step(self, rotor_speeds):
        """Uses action to obtain next state, reward, done."""
        reward = 0
        pose_all = []
        for _ in range(self.action_repeat):
            done = self.sim.next_timestep(
                rotor_speeds)  # update the sim pose and velocities
            reward += self.get_reward()
            pose_all.append(self.sim.pose)
        next_state = np.concatenate(pose_all)
        return next_state, reward, done

    def reset(self):
        """Reset the sim to start a new episode."""
        self.sim.reset()
        state = np.concatenate([self.sim.pose] * self.action_repeat)
        return state
class HoverTask(Task):
    """Task (environment) that defines the goal and provides feedback to the agent."""
    def __init__(self, init_pose=None, init_velocities=None,
        init_angle_velocities=None, min_height=1., reward_weights = None, runtime=5.):
        """Initialize a Task object.
        Params
        ======
            init_pose: initial position of the quadcopter in (x,y,z) dimensions and the Euler angles
            init_velocities: initial velocity of the quadcopter in (x,y,z) dimensions
            init_angle_velocities: initial radians/second for each of the three Euler angles
            min_height: minimum height to maintain in meters
            runtime: time limit for each episode in seconds
        """
        # Simulation
        self.init_pose = init_pose if init_pose is not None else np.array([0.,0.,10.,0.,0.,0.])
        self.init_velocities = init_velocities if init_velocities is not None else np.zeros(3)
        self.init_angle_velocities = init_angle_velocities if init_angle_velocities is not None else np.zeros(3)
        self.sim = PhysicsSim(self.init_pose, self.init_velocities, self.init_angle_velocities, runtime)
        self.action_repeat = 3

        self.state_size = self.action_repeat * 12 # 6 pose + 6 velocity

        self.action_low = 0    # Min RPM value
        self.action_high = 900 # Max RPM value
        self.action_size = 4   # Num actions (4 rotors)

        # Goal
        self.target_pos = init_pose[0:3] if init_pose is not None else np.array([0., 0., 10.])
        self.reward_weights = reward_weights if reward_weights is not None else np.array([10.,0.,-2.,-1.])

    # Helpful for debugging
    # def get_reward_components(self):
    #     norm = lambda a: np.sum(np.square(a))
    #     return 1, norm(self.sim.pose[0:3]-self.target_pos), norm(self.sim.v), norm(self.sim.angular_v)

    def get_reward(self):
        """Uses current pose of sim to return reward."""
        height_diff = abs(self.sim.pose[2]-self.init_pose[2])
        vert_v = abs(self.sim.v[2])
        spin_v = sqrt(np.sum(np.square(self.sim.angular_v)))
        out_of_bounds = np.any(np.concatenate(
                    (np.less_equal(self.sim.pose[:3],self.sim.lower_bounds),
                    np.greater_equal(self.sim.pose[:3],self.sim.upper_bounds)
                )))
        oob_penalty = 100 if out_of_bounds else 0
        reward = 1./(1+exp(-height_diff))+1./(1+exp(-vert_v/10.))-spin_v/100.
        return reward

    def step(self, rotor_speeds):
        """Uses action to obtain next state, reward, done."""
        reward = 0
        pose_all = []

        # Clip rotor speeds to min and max
        rotor_speeds = np.maximum(rotor_speeds, self.action_low*np.ones(self.action_size))
        rotor_speeds = np.minimum(rotor_speeds, self.action_high*np.ones(self.action_size))

        for _ in range(self.action_repeat):
            done = self.sim.next_timestep(rotor_speeds) # update the sim pose and velocities
            reward += self.get_reward()
            pose_all.append(np.concatenate((self.sim.pose, self.sim.v, self.sim.angular_v))) # save pose, v and angular_v
        next_state = np.concatenate(pose_all)
        return next_state, reward, done

    def reset(self):
        """Reset the sim to start a new episode."""
        self.sim.reset()
        state = np.concatenate([np.concatenate((self.sim.pose, self.sim.v, self.sim.angular_v))] * self.action_repeat)
        return state
Beispiel #25
0
class Task():
    """Task (environment) that defines the goal and provides feedback to the agent."""
    def __init__(self,
                 init_pose=None,
                 init_velocities=None,
                 init_angle_velocities=None,
                 runtime=5.,
                 target_pos=None):
        """Initialize a Task object.
        Params
        ======
            init_pose: initial position of the quadcopter in (x,y,z) dimensions and the Euler angles
            init_velocities: initial velocity of the quadcopter in (x,y,z) dimensions
            init_angle_velocities: initial radians/second for each of the three Euler angles
            runtime: time limit for each episode
            target_pos: target/goal (x,y,z) position for the agent
        """
        # Simulation
        self.sim = PhysicsSim(init_pose, init_velocities,
                              init_angle_velocities, runtime)
        self.action_repeat = 3

        self.state_size = self.action_repeat * 6
        self.action_low = 0
        self.action_high = 900
        self.action_size = 4

        # Goal
        self.target_pos = target_pos if target_pos is not None else np.array(
            [0., 0., 10.])

    def get_reward(self):
        """Uses current pose of sim to return reward."""
        reward = 0.

        ## Add the vertical velocity value to a reward when the quadcopter flew up and down in z-direction.
        ## If the velocity goes up, then the reward gets increased. Otherwise, it gets decreased.
        reward += self.sim.v[2]

        ## Check x and y position movement
        pos_xy = 1 - 2 * (abs(self.sim.pose[:2] - self.target_pos[:2])).sum()
        if pos_xy < -1:  ## Give a penalty if x and y position has been changed too much
            reward -= 1
        elif pos_xy == 1:  ## reward if maintains the vertical in x0 and y0
            reward += 2
        else:  ## If x and y moved very little, it is considered as a well-maintenance in x and y position
            reward += 1

        ## Check angular velocity change
        av = 1 - 2 * (abs(self.sim.angular_v[:3])).sum()
        ## If the angular velocities are irregular enough to disrupt the quadcopter's vertical ascending movement.
        ## then give a negative reward. Otherwise, a positive reward
        if av > 1:
            reward -= 1
        elif av < -1:
            reward += 1

        ## Checked the vertical distance between the current position and the target position.
        z_distance = 1 - 2 * (abs(self.sim.pose[2] - self.target_pos[2]))
        if z_distance < -1:  ## if the z-distance is getting bigger, then give a negative reward.
            reward -= 1
        elif z_distance > 0:  ## if the z-distance is 0, then give a big positive reward.
            reward += 5
        else:  ## if the z-distance is getting closer, then give a positive reward.
            reward += 2

        return reward

    def step(self, rotor_speeds):
        """Uses action to obtain next state, reward, done."""
        reward = 0
        pose_all = []
        for _ in range(self.action_repeat):
            done = self.sim.next_timestep(
                rotor_speeds)  # update the sim pose and velocities
            reward += self.get_reward()
            pose_all.append(self.sim.pose)

            if (self.sim.pose[2] > self.target_pos[2]):
                reward += 200
                done = True

        next_state = np.concatenate(pose_all)

        return next_state, reward, done

    def reset(self):
        """Reset the sim to start a new episode."""
        self.sim.reset()
        state = np.concatenate([self.sim.pose] * self.action_repeat)
        return state
Beispiel #26
0
class Task():
    """Task (environment) that defines the goal and provides feedback to the agent."""
    def __init__(self,
                 init_pose=None,
                 init_velocities=None,
                 init_angle_velocities=None,
                 runtime=5.,
                 target_pos=None):
        """Initialize a Task object.
        Params
        ======
            init_pose: initial position of the quadcopter in (x,y,z) dimensions and the Euler angles
            init_velocities: initial velocity of the quadcopter in (x,y,z) dimensions
            init_angle_velocities: initial radians/second for each of the three Euler angles
            runtime: time limit for each episode
            target_pos: target/goal (x,y,z) position for the agent
        """
        # Simulation
        self.sim = PhysicsSim(init_pose, init_velocities,
                              init_angle_velocities, runtime)
        self.action_repeat = 3

        self.state_size = self.action_repeat * 6
        self.action_low = 1
        self.action_high = 900
        self.action_size = 4
        self.runtime = runtime
        '''
        try limiting the action space and state space to only vertical
        set the rotor speeds equal to each other if possible
        tweak the neural network in model.py
        '''

        # Goal
        self.target_pos = target_pos if target_pos is not None else np.array(
            [0., 0., 10.])

    def distance_to_target(self, current, target):
        import math
        d_1 = current[0] - target[0]
        d_2 = current[1] - target[1]
        d_3 = current[2] - target[2]
        deviation = math.sqrt((d_1)**2 + (d_2)**2 + (d_3)**2)
        return deviation

    def avg_rotor_speed(self, rotor_speeds):
        import math
        average = np.average(rotor_speeds)
        s_0 = rotor_speeds[0] - average
        s_1 = rotor_speeds[1] - average
        s_2 = rotor_speeds[2] - average
        s_3 = rotor_speeds[3] - average
        deviation = math.sqrt((s_0)**2 + (s_1)**2 + (s_2)**2 + (s_3)**2)
        return deviation

    def get_reward(self, rotor_speeds, state):
        """Uses current pose of sim to return reward."""
        # reward = 1.-.3*(abs(self.sim.pose[:3] - self.target_pos)).sum()

        # distance_delta = self.distance_to_target(self.sim.pose, self.target_pos)
        # angular_v_delta = self.distance_to_target(self.sim.angular_v, [0,0,0])
        # speed_delta = self.avg_rotor_speed(rotor_speeds)

        ## runtime
        # if self.runtime - self.sim.time == 0:
        #     bonus = .5
        # else:
        #     bonus = 0
        # reward = 1.0 - (0.5*dist_rew) #+ (-0.2 * angular_v_delta) +  (-0.005 * speed_delta) # + bonus

        reward = (1 / (abs(state[2] - self.target_pos[2]) / 10))
        # dist_rew = distance_delta
        if (abs(state[2] - self.target_pos[2])) <= 2:
            reward = 5
        ## penalise for crashing
        if self.sim.pose[2] == 0:
            reward = -10

        return reward

    def step(self, rotor_speeds, state):
        """Uses action to obtain next state, reward, done."""
        reward = 0
        pose_all = []
        for _ in range(self.action_repeat):
            done = self.sim.next_timestep(
                rotor_speeds)  # update the sim pose and velocities
            reward += self.get_reward(rotor_speeds, state)
            pose_all.append(self.sim.pose)
        # new position
        next_state = np.concatenate(pose_all)
        return next_state, reward, done

    def reset(self):
        """Reset the sim to start a new episode."""
        self.sim.reset()
        state = np.concatenate([self.sim.pose] * self.action_repeat)
        return state
Beispiel #27
0
class Task():
    """Task (environment) that defines the goal and provides feedback to the agent."""
    def __init__(self,
                 init_pose=None,
                 init_velocities=None,
                 init_angle_velocities=None,
                 runtime=5.,
                 target_pos=None,
                 task_name='vanilla'):
        """Initialize a Task object.
        Params
        ======
            init_pose: initial position of the quadcopter in (x,y,z) dimensions and the Euler angles
            init_velocities: initial velocity of the quadcopter in (x,y,z) dimensions
            init_angle_velocities: initial radians/second for each of the three Euler angles
            runtime: time limit for each episode
            target_pos: target/goal (x,y,z) position for the agent
        """
        # Simulation
        self.sim = PhysicsSim(init_pose, init_velocities,
                              init_angle_velocities, runtime)
        self.action_repeat = 3

        self.state_size = self.action_repeat * 6
        self.action_size = 4

        self.action_low = 300
        self.action_high = 1000

        self.runtime = runtime

        # Goal
        self.target_pos = target_pos if target_pos is not None else np.array(
            [0., 0., 10.])

        self.task_name = task_name
        self.ep_reward = 0
        self.pos_reach_treshold = 0.5

    def get_reward(self, pose_rep=[]):
        return eval("self.get_reward_" + self.task_name)(pose_rep)

    def get_reward_vanilla(self, pose_rep):
        """Uses current pose of sim to return reward."""
        reward = 1. - .3 * (abs(self.sim.pose[:3] - self.target_pos)).sum()
        return reward

    def get_reward_takeoff(self, pose_rep):

        #weights for x,y,z dims ---> z more important for takeoff
        xyz_w = [.3, .3, .4]

        time_reward = 1
        dir_reward = np.dot(xyz_w, self.get_direction_reward(pose_rep))
        pos_reward = np.dot(xyz_w, self.get_pos_reward(pose_rep))

        #weights of direction / position / time
        step_weights = [.15, .25, .6]
        step_reward = np.tanh(
            np.dot(step_weights, [dir_reward, pos_reward, time_reward]))

        terminal_reward = 0

        if self.sim.done and self.sim.time < self.runtime:
            terminal_reward = -10  #crash
        elif self.get_task_success(pose_rep):
            terminal_reward = 10  #target position reached

        #print([appr_reward, pos_reward, time_reward, crash_penalty])
        #print("reward:{} z_approx:{} z_posdiff:{} crash_penalty:{}".format(reward, z_approx, -z_posdiff, crash_penalty))
        return step_reward + terminal_reward

    def get_direction_reward(self, pose_rep):

        pos_log = np.array(pose_rep)[:, :3]
        diff_init = np.abs(np.array(pos_log - self.target_pos))
        #print("\n",diff_init)
        mrate = np.diff(diff_init, axis=0)
        #print("\n",speeds)
        dir_reward = -np.average(mrate, axis=0, weights=[.3, .7])
        #print("\n",appr_reward)
        return dir_reward

    def get_pos_reward(self, pose_rep):

        pos_diff = self.get_pos_diff(pose_rep[self.action_repeat - 1])
        pos_reward = np.array(
            [-pd if pd > self.pos_reach_treshold else 10 for pd in pos_diff])
        return pos_reward

    def get_pos_diff(self, pose):
        return np.abs(pose[:3] - self.target_pos)

    def get_task_success(self, pose_rep):
        pos_diff = self.get_pos_diff(pose_rep[self.action_repeat - 1])
        pos_r = np.array(
            [0 if pd > self.pos_reach_treshold else 1 for pd in pos_diff])
        return np.sum(pos_r) == 3

    def step(self, rotor_speeds):
        """Uses action to obtain next state, reward, done."""
        reward = 0
        pose_all = []
        for _ in range(self.action_repeat):
            done = self.sim.next_timestep(
                rotor_speeds)  # update the sim pose and velocities
            #reward += self.get_reward()
            pose_all.append(self.sim.pose)

        reward = self.get_reward(pose_all)
        self.ep_reward += reward

        #done if target position is reached
        done = done or self.get_task_success(pose_all)

        next_state = np.concatenate(pose_all)
        return next_state, reward, done

    def reset(self):
        """Reset the sim to start a new episode."""
        self.ep_reward = 0
        self.sim.reset()
        state = np.concatenate([self.sim.pose] * self.action_repeat)
        return state
Beispiel #28
0
class Task():
    """Task (environment) that defines the goal and provides feedback to the agent."""

    # --------------------------------------------------------------------------
    def __init__(self,
                 init_pose=None,
                 init_velocities=None,
                 init_angle_velocities=None,
                 runtime=5.,
                 target_pos=None):
        """Initialize a Task object.
        Params
        ======
            init_pose: initial position of the quadcopter in (x,y,z) dimensions and the Euler angles
            init_velocities: initial velocity of the quadcopter in (x,y,z) dimensions
            init_angle_velocities: initial radians/second for each of the three Euler angles
            runtime: time limit for each episode
            target_pos: target/goal (x,y,z) position for the agent
        """

        # Goal
        self.target_pos = target_pos if target_pos is not None else np.array(
            [0., 0., 10.])
        self.init_pose = init_pose

        # Simulation
        self.sim = PhysicsSim(init_pose, init_velocities,
                              init_angle_velocities, runtime)
        self.action_repeat = 3

        self.state_size = self.action_repeat * 6
        self.action_low = 0
        self.action_high = 900
        self.action_size = 4

        self.count = 1

    # --------------------------------------------------------------------------
    def get_reward(self):
        """Uses current pose of sim to return reward."""
        differences = 0
        checks = list(range(0, 4))
        combos = itertools.combinations(checks, 2)
        for combo in combos:
            differences -= abs(self.sim.prop_wind_speed[combo[0]] -
                               self.sim.prop_wind_speed[combo[1]])

        reward = 1 - .3 * abs(self.target_pos - self.sim.pose[:3]).sum(
        ) - .2 * np.tanh(abs(self.sim.pose[4]))  #- .1*np.tanh(differences)
        #print(differences)
        #if done and self.sim.pose[2] <= self.init_pose[2]: reward += -1000

        #reward = 1. - .3*(abs(self.sim.pose[:3] - self.target_pos)).sum() - 6*(1/self.count)
        #reward = 1. - .3*(abs(self.sim.pose[:3] - self.target_pos)).sum()
        return reward

    # --------------------------------------------------------------------------
    def step(self, rotor_speeds):
        """Uses action to obtain next state, reward, done."""
        reward = 0
        self.count += 1
        pose_all = []
        for _ in range(self.action_repeat):
            done = self.sim.next_timestep(
                rotor_speeds)  # update the sim pose and velocities
            reward += self.get_reward()
            pose_all.append(self.sim.pose)
        next_state = np.concatenate(pose_all)
        return next_state, reward, done

    # --------------------------------------------------------------------------
    def reset(self):
        """Reset the sim to start a new episode."""
        self.sim.reset()
        self.count = 1
        state = np.concatenate([self.sim.pose] * self.action_repeat)
        return state
Beispiel #29
0
class Task():
    """Task (environment) that defines the goal and provides feedback to the agent."""
    def __init__(self,
                 init_pose=None,
                 init_velocities=None,
                 init_angle_velocities=None,
                 runtime=5.,
                 target_pos=None):
        """Initialize a Task object.
        Params
        ======
            init_pose: initial position of the quadcopter in (x,y,z) dimensions and the Euler angles
            init_velocities: initial velocity of the quadcopter in (x,y,z) dimensions
            init_angle_velocities: initial radians/second for each of the three Euler angles
            runtime: time limit for each episode
            target_pos: target/goal (x,y,z) position for the agent
        """
        # Simulation
        self.sim = PhysicsSim(init_pose, init_velocities,
                              init_angle_velocities, runtime)
        self.action_repeat = 3

        self.state_size = self.action_repeat * 6
        self.action_low = 0
        self.action_high = 900
        self.action_size = 4

        # Goal
        self.target_pos = target_pos if target_pos is not None else np.array(
            [0., 0., 10.])

    def get_reward(self):
        """Uses current pose of sim to return reward."""
        reward = 0
        penalty = 0
        current_position = self.sim.pose[:3]
        # penalty for euler angles, we want the takeoff to be stable
        penalty += abs(self.sim.pose[3:6]).sum()
        # penalty for distance from target
        penalty += abs(current_position[0] - self.target_pos[0])**2
        penalty += abs(current_position[1] - self.target_pos[1])**2
        penalty += 10 * abs(current_position[2] - self.target_pos[2])**2

        # link velocity to residual distance
        penalty += abs(
            abs(current_position - self.target_pos).sum() -
            abs(self.sim.v).sum())

        distance = np.sqrt((current_position[0] - self.target_pos[0])**2 +
                           (current_position[1] - self.target_pos[1])**2 +
                           (current_position[2] - self.target_pos[2])**2)
        # extra reward for flying near the target
        if distance < 10:
            reward += 1000
        # constant reward for flying
        reward += 100
        return reward - penalty * 0.0002

    def step(self, rotor_speeds):
        """Uses action to obtain next state, reward, done."""
        reward = 0
        pose_all = []
        for _ in range(self.action_repeat):
            done = self.sim.next_timestep(
                rotor_speeds)  # update the sim pose and velocities
            reward += self.get_reward()
            pose_all.append(self.sim.pose)
        next_state = np.concatenate(pose_all)
        return next_state, reward, done

    def reset(self):
        """Reset the sim to start a new episode."""
        self.sim.reset()
        state = np.concatenate([self.sim.pose] * self.action_repeat)
        return state
Beispiel #30
0
class Task():
    """Task (environment) that defines the goal and provides feedback to the agent."""
    def __init__(self,
                 init_pose=None,
                 init_velocities=None,
                 init_angle_velocities=None,
                 runtime=5.,
                 target_pos=None):
        """Initialize a Task object.
        Params
        ======
            init_pose: initial position of the quadcopter in (x,y,z) dimensions and the Euler angles
            init_velocities: initial velocity of the quadcopter in (x,y,z) dimensions
            init_angle_velocities: initial radians/second for each of the three Euler angles
            runtime: time limit for each episode
            target_pos: target/goal (x,y,z) position for the agent
        """
        # Simulation
        self.sim = PhysicsSim(init_pose, init_velocities,
                              init_angle_velocities, runtime)
        self.action_repeat = 3
        self.state_size = self.action_repeat * self.get_sim_state().shape[0]
        self.action_low = 0
        self.action_high = 900
        self.action_size = 4

        # Goal
        self.start_state = self.get_sim_state()
        # self.target_pos = target_pos if target_pos is not None else np.array([0., 0., 10.])

    def get_reward(self):
        """Uses current pose of sim to return reward."""
        def bound_val(val, coef, min_val, max_val):
            return coef * np.clip(val, min_val, max_val)

        #reward_goal =  1.-.3*(abs(self.sim.pose[:3] - self.target_pos)).sum() # goal position

        reward_no_roll = -np.power(self.sim.pose[3], 2)  # no roll
        reward_no_pitch = -np.power(self.sim.pose[4], 2)  # no pitch

        reward_no_xy_v = -np.power(self.sim.v[:2],
                                   2).sum() / 30  # no xy velocity
        reward_z_v = self.sim.v[2]

        reward = bound_val(reward_no_xy_v, 1, -1, 0) \
            + bound_val(reward_z_v, 1, 0, 10) \
            + bound_val(reward_no_roll, 5, -1, 0) \
            + bound_val(reward_no_pitch, 5, -1, 0)

        return reward

    def get_sim_state(self):
        return np.hstack([self.sim.pose, self.sim.v, self.sim.angular_v])

    def step(self, rotor_speeds):
        """Uses action to obtain next state, reward, done."""
        reward = 0
        pose_all = []
        for _ in range(self.action_repeat):
            done = self.sim.next_timestep(
                rotor_speeds)  # update the sim pose and velocities
            reward += self.get_reward()
            pose_all.append(self.get_sim_state())
        next_state = np.concatenate(pose_all)
        return next_state, reward, done

    def reset(self):
        """Reset the sim to start a new episode."""
        self.sim.reset()
        self.start_state = self.get_sim_state()
        state = np.concatenate([self.get_sim_state()] * self.action_repeat)
        return state
class Takeoff():
    """Task (environment) that defines the goal and provides feedback to the agent."""
    def __init__(self, init_pose=None, init_velocities=None, 
        init_angle_velocities=None, runtime=5., target_pos=None,position_noise=None):
        """Initialize a Task object.
        Params
        ======
            init_pose: initial position of the quadcopter in (x,y,z) dimensions and the Euler angles
            init_velocities: initial velocity of the quadcopter in (x,y,z) dimensions
            init_angle_velocities: initial radians/second for each of the three Euler angles
            runtime: time limit for each episode
            target_pos: target/goal (x,y,z) position for the agent
        """
        print("########2 Task to Takeoff")
        # Simulation
        self.sim = PhysicsSim(init_pose, init_velocities, init_angle_velocities, runtime) 
        self.action_repeat = 3

        self.state_size = self.action_repeat * 6
        self.action_low = 0
        self.action_high = 900
        self.action_size = 4
        self.runtime = runtime
        self.position_noise = position_noise
        # Goal
        self.target_pos = target_pos if target_pos is not None else np.array([0., 0., 10.]) 

    def get_reward(self):
        """Uses current pose of sim to return reward."""
        #reward = 1.-.3*(abs(self.sim.pose[:3] - self.target_pos)).sum()
        reward = 0.0

        #encorage acceleration in the +z-direction
        #reward += self.sim.linear_accel[2]
        #reward +=  0.5 * self.sim.linear_accel[2] * self.sim.dt**2

        #encorage velocity in the +z-direction
        if  self.sim.v[2] > 0:
            reward = 10*self.sim.v[2]
        else:
            reward = -10

        
        
        
        
        



        #encorage smaller position errors and limit the demenonator to not be too much close to zero
        #posDiff = -abs(self.sim.pose[ 2] - self.target_pos[ 2]) #**2 
        #if posDiff > 0.1: 
        #    reward += 10/posDiff 
        #else :
        #    reward += 100


        return reward

    def step(self, rotor_speeds):
        """Uses action to obtain next state, reward, done."""
        reward = 0
        pose_all = []
        for _ in range(self.action_repeat):
            done = self.sim.next_timestep(rotor_speeds) # update the sim pose and velocities
            reward += self.get_reward() 
            pose_all.append(self.sim.pose)

            #sparse reward at the end if target is reached 
            if(self.sim.pose[2] >= self.target_pos[2]):
                print("\n sucessfull takeoff to target target  !!!!!!!!!")
                reward += 10
                done = True

        #if np.isclose(self.target_pos[2], self.sim.pose[2], 1):
        #    reward += 100
        #    done = True



        #if(self.sim.pose[2] >= self.target_pos[2] and self.sim.time >= self.runtime):
        #if np.allclose(self.sim.pose[:-3],self.target_pos, rtol=1):
        #    reward +=100
        #    done = True

        next_state = np.concatenate(pose_all)
        return next_state, reward, done


    def reset(self):
        """Reset the sim with noise to start a new episode."""
        self.sim.reset()

        #::os:: add random start position as suggested by some papers to make agent more generalized
        if self.position_noise is not None or self.ang_noise is not None:
            rand_pose = np.copy(self.sim.init_pose)
            if self.position_noise is not None and self.position_noise > 0:
                rand_pose[:3] += np.random.normal(0.0, self.position_noise, 3)
                #print("start rand_pose =",rand_pose) 

            self.sim.pose = np.copy(rand_pose)

        state = np.concatenate([self.sim.pose] * self.action_repeat)
        return state
Beispiel #32
0
class Task():
    """Task (environment) that defines the goal and provides feedback to the agent."""
    def __init__(self,
                 init_pose=None,
                 init_velocities=None,
                 init_angle_velocities=None,
                 runtime=5.,
                 target_pos=None):
        """Initialize a Task object.
        Params
        ======
            init_pose: initial position of the quadcopter in (x,y,z) dimensions and the Euler angles
            init_velocities: initial velocity of the quadcopter in (x,y,z) dimensions
            init_angle_velocities: initial radians/second for each of the three Euler angles
            runtime: time limit for each episode
            target_pos: target/goal (x,y,z) position for the agent
        """
        # Simulation
        self.sim = PhysicsSim(init_pose, init_velocities,
                              init_angle_velocities, runtime)
        self.action_repeat = 3

        self.state_size = self.action_repeat * 6
        self.action_low = 0
        self.action_high = 900
        self.action_size = 4
        self.is_done = 0
        # Goal
        self.target_pos = target_pos if target_pos is not None else np.array(
            [0., 0., 10.])

    def get_reward(self):
        """Uses current pose of sim to return reward."""
        # penalize crash

        factor = 1
        if abs(self.sim.pose[2] - self.target_pos[2]) == 0:
            factor = 100000000
        elif self.is_done and self.sim.time < self.sim.runtime:
            factor = -100
        else:
            factor = (1 / abs(self.sim.pose[2] - self.target_pos[2]))
        reward = factor + (100. - .3 *
                           (abs(self.sim.pose[:3] - self.target_pos)).sum())
        #print("\rposition= ", self.sim.pose[:3], ", reward = ", reward)

        return reward

    def step(self, rotor_speeds):
        """Uses action to obtain next state, reward, done."""
        reward = 0
        pose_all = []
        for _ in range(self.action_repeat):
            done = self.sim.next_timestep(
                rotor_speeds)  # update the sim pose and velocities
            self.is_done = done
            reward += self.get_reward()
            pose_all.append(self.sim.pose)
        next_state = np.concatenate(pose_all)
        return next_state, reward, done, self.sim.pose

    def reset(self):
        """Reset the sim to start a new episode."""
        self.sim.reset()
        state = np.concatenate([self.sim.pose] * self.action_repeat)
        return state