Beispiel #1
0
class Task():
    """Task (environment) that defines the goal and provides feedback to the agent."""
    def __init__(self, init_pose=None, init_velocities=None, 
        init_angle_velocities=None, runtime=5., target_pos=None, action_repeat=1):
        """Initialize a Task object.
        Params
        ======
            init_pose: initial position of the quadcopter in (x,y,z) dimensions and the Euler angles
            init_velocities: initial velocity of the quadcopter in (x,y,z) dimensions
            init_angle_velocities: initial radians/second for each of the three Euler angles
            runtime: time limit for each episode
            target_pos: target/goal (x,y,z) position for the agent
        """
        # Simulation
        self.sim = PhysicsSim(init_pose, init_velocities, init_angle_velocities, runtime) 
        self.action_repeat = action_repeat

        self.state_size = self.action_repeat * 6
        self.action_low = 1
        self.action_high = 900
        self.action_size = self.action_repeat * 4

        # Goal
        self.target_pos = target_pos if target_pos is not None else np.array([0., 0., 10.]) 

    def get_reward(self):
        """Uses current pose of sim to return reward."""
        reward = 10.
        if self.sim.pose[2] < (self.target_pos[2] - 3.): 
            reward += 2*(self.sim.pose[2] - self.sim.init_pose[2])
            reward -= (abs(self.sim.pose[:2] - self.target_pos[:2]) > 3.).sum()
        else: 
            reward += 10.
            reward -= 2*(abs(self.sim.pose[:3] - self.target_pos) > 3.).sum()
            reward -= np.int32(abs(self.sim.pose[2] - self.target_pos[2]) > 3.)
        
        if self.sim.done:
            if self.sim.time < self.sim.runtime: 
                reward -= 10
                
        reward = reward / 10
        return reward

    def step(self, rotor_speeds):
        """Uses action to obtain next state, reward, done."""
        reward = 0
        pose_all = []
        for _ in range(self.action_repeat):
            done = self.sim.next_timestep(rotor_speeds[(_*4):((_*4)+4)]) # update the sim pose and velocities
            reward += self.get_reward() 
            pose_all.append(self.sim.pose)
        next_state = np.concatenate(pose_all)
        return next_state, (reward / self.action_repeat), done

    def reset(self):
        """Reset the sim to start a new episode."""
        self.sim.reset()
        state = np.concatenate([self.sim.pose] * self.action_repeat) 
        return state
Beispiel #2
0
    def __init__(self, init_pose=None, init_velocities=None, 
        init_angle_velocities=None, runtime=5., target_pos=None):
        """Initialize a Task object.
        Params
        ======
            init_pose: initial position of the quadcopter in (x,y,z) dimensions and the Euler angles
            init_velocities: initial velocity of the quadcopter in (x,y,z) dimensions
            init_angle_velocities: initial radians/second for each of the three Euler angles
            runtime: time limit for each episode
            target_pos: target/goal (x,y,z) position for the agent
        """
        # Simulation
        self.sim = PhysicsSim(init_pose, init_velocities, init_angle_velocities, runtime) 
        self.action_repeat = 3

        self.state_size = self.action_repeat * 6
        self.action_low = 0 #0
        self.action_high = 900  #900
        self.action_size = 4

        # Goal
        self.target_pos = target_pos if target_pos is not None else np.array([0., 0., 10.]) 
Beispiel #3
0
class Task():
    """
    Task (environment) that defines the goal and provides feedback to the agent.

    The task to consider here is Take-off.
    The quadcoputer starts a little above of (x, y, z) = (0, 0, 0).
    The target is set to (x, y, z) = (0, 0, 10.0).

    """
    def __init__(self, init_pose=None, init_velocities=None,
        init_angle_velocities=None, runtime=5., target_pos=None):
        """

        Initialize a Task object.
        Params
        ======
            init_pose: initial position of the quadcopter in (x,y,z) dimensions and the Euler angles
            init_velocities: initial velocity of the quadcopter in (x,y,z) dimensions
            init_angle_velocities: initial radians/second for each of the three Euler angles
            runtime: time limit for each episode
            target_pos: target/goal (x,y,z) position for the agent

        """
        # Simulation
        self.sim = PhysicsSim(init_pose, init_velocities, init_angle_velocities, runtime)
        self.action_repeat = 3

        self.state_size = self.action_repeat * 6 # size of the state space (with action repeats)
        self.action_size = 4 # size of the action space
        self.action_low = 0.0 # lower bound for the action space
        self.action_high = 900.0 # upper bound for the action space

        # the target position
        self.target_pos = target_pos if target_pos is not None else np.array([0., 0., 10.])
        # the target angle
        self.target_angle = np.array([0., 0., 0.])

    def get_reward_ddgp(self):
        """
        Uses current pose of sim to return reward and done
        (so that once the quadcopter reaches the height of the target,
        the episode ends. )

        """

        ## the following one is the best one March 26
        reward = - 0.20*(abs(self.sim.pose[:2] - self.target_pos[:2])).sum()
        reward += 2.4 - 1.2*abs(self.sim.pose[2] - self.target_pos[2])
        # reward += 2.4 - 1.2*abs(self.sim.pose[2] - self.target_pos[2])

        # since the angles in self.sim.pose[3:] take value in [0, 2*pi]
        # I will rewrite them such that they take value in range [-pi, pi]
        # and then introduce the reward
        angles_around_0 = np.array([0.0, 0.0, 0.0])
        for i in range(3):
            if self.sim.pose[i+3] <= np.pi:
                angles_around_0[i] = self.sim.pose[i+3]
            else:
                angles_around_0[i] = self.sim.pose[i+3] - 2.0*np.pi
        reward += - 0.20*(abs(angles_around_0[0:3]-self.target_angle)).sum()

        done = False
        if(self.sim.pose[2] >= self.target_pos[2]):
            reward += 200.0
            done = True

        return reward, done

    def step_ddpg(self, rotor_speeds):
        """
        Uses action to obtain next state, reward, done.
        For DDPG algorithm.

        """
        reward = 0.0
        pose_all = []
        for _ in range(self.action_repeat):
            done = self.sim.next_timestep(rotor_speeds) # update the sim pose and velocities
            delta_reward, done_height = self.get_reward_ddgp()
            reward += delta_reward
            # once the quadcoper reaches the height of the target, end the episode
            if done_height:
                done = done_height
            pose_all.append(self.sim.pose)
        next_state = np.concatenate(pose_all)

        return next_state, reward, done

    def reset(self):
        """
        Reset the sim to start a new episode.

        """
        self.sim.reset()
        state = np.concatenate([self.sim.pose] * self.action_repeat)
        return state

    def get_reward(self):
        """
        THIS ONE IS USED FOR THE DEMOMSTRATION PART OF THE NOTEBOOK.
        Uses current pose of sim to return reward and done
        (so that once the quadcopter reaches the height of the target,
        the episode ends. )

        """
        reward = 1.-.3*(abs(self.sim.pose[:3] - self.target_pos)).sum()
        return reward

    def step(self, rotor_speeds):
        """
        THIS ONE IS USED FOR THE DEMONSTRATION PART OF THE NOTEBOOK.
        Uses action to obtain next state, reward, done.

        """
        reward = 0.0
        pose_all = []
        for _ in range(self.action_repeat):
            done = self.sim.next_timestep(rotor_speeds) # update the sim pose and velocities
            reward += self.get_reward()
            pose_all.append(self.sim.pose)
        next_state = np.concatenate(pose_all)

        return next_state, reward, done
Beispiel #4
0
class Task():
    """Task (environment) that defines the goal and provides feedback to the agent."""
    def __init__(self, init_pose=None, init_velocities=None, 
        init_angle_velocities=None, runtime=5., target_pos=None):
        """Initialize a Task object.
        Params
        ======
            init_pose: initial position of the quadcopter in (x,y,z) dimensions and the Euler angles
            init_velocities: initial velocity of the quadcopter in (x,y,z) dimensions
            init_angle_velocities: initial radians/second for each of the three Euler angles
            runtime: time limit for each episode
            target_pos: target/goal (x,y,z) position for the agent
        """
        # Simulation
        self.init_pose = init_pose
        self.sim = PhysicsSim(init_pose, init_velocities, init_angle_velocities, runtime) 
        self.action_repeat = 3

        self.state_size = self.action_repeat * 6
        self.action_low = 0
        self.action_high = 900
        self.action_size = 4

        # Goal
        self.target_pos = target_pos if target_pos is not None else np.array([0., 0., 10.]) 

    def get_reward(self, done):
        """Uses current pose of sim to return reward."""
        #pos = np.array(self.sim.pose[:3])
        #target = np.array(self.target_pos[:3])
        #reward = np.tanh(2 - 4e-3 * np.sum((self.target_pos - self.sim.pose[2])**2)  - 3e-4 * np.sum(self.sim.v[2]))    
        #reward += self.sim.v[2]*10
        #done = False
        #if distance < 0.5: 
         #   done = True
        #reward = np.tanh(self.sim.pose[2] * 10 +self.sim.v[2] *10)
        #reward = np.tanh(1 + self.sim.v[2]  + self.sim.pose[2] )
        '''- np.sqrt(np.square(self.sim.v[:2]).sum())'''
        #reward = np.tanh(1 - 0.003 * (abs(self.target_pos - self.sim.pose[:3]))).sum()
        #np.tanh(1 - 0.003*(abs(self.sim.pose[:3] - self.target_pos))).sum()
        
        #dist_from_target = np.sqrt(np.square(self.sim.pose[:3] - self.target_pos).sum())
        # Want the quadcopter to hover straight up, measure angular deviation
        #angular_dev = np.sqrt(np.square(self.sim.pose[3:]).sum())
        # Also want to penalise movement not in z-axis (DeepMind Locomotion)
        #non_z_v = np.square(self.sim.v[:2]).sum()
        #reward = 1. - .001*dist_from_target - .0005*angular_dev - .0005*non_z_v
        #reward = np.tanh(1.-.2*(abs(self.sim.pose[:3] - self.target_pos)).sum())
        reward = 0
        #if self.sim.pose[2] >= self.target_pos[2]:
        #    reward += 10
        #reward += np.tanh(np.linalg.norm(self.sim.pose[:3] -self.target_pos) + self.sim.v[2])
        # initial reward - fraction of the z-velocity
        reward = 0.5 * self.sim.v[2]
    
        # additional reward if the agent is close in vertical coordinates
        # to the target pose
        if abs(self.sim.pose[2] - self.target_pos[2]) < 3:
            reward += 15.
            done = True
        
        # penalize a crash
        if done and self.sim.time < self.sim.runtime: 
            reward = -1
            return reward, done
    
        # penalize the downward movement relative to the starting position
        if self.sim.pose[2] < self.init_pose[2]:
            reward -= 1

        return np.tanh((1 - .001 * np.linalg.norm(self.sim.pose[:3] - self.target_pos)) + reward + 0.1 * self.sim.v[2]), done
        
    def step(self, rotor_speeds):
        """Uses action to obtain next state, reward, done."""
        reward = 0
        pose_all = []
        for _ in range(self.action_repeat):
            done = self.sim.next_timestep(rotor_speeds) # update the sim pose and velocities
            reward, done = self.get_reward(done)
            pose_all.append(self.sim.pose)
        next_state = np.concatenate(pose_all)
        return next_state, reward, done
    
    def reset(self):
        """Reset the sim to start a new episode."""
        self.sim.reset()
        state = np.concatenate([self.sim.pose] * self.action_repeat) 
        return state
class Task():
    """Task (environment) that defines the goal and provides feedback to the agent."""
    def __init__(self, init_pose=None, init_velocities=None, 
        init_angle_velocities=None, runtime=5., target_pos=None):
        """Initialize a Task object.
        Params
        ======
            init_pose: initial position of the quadcopter in (x,y,z) dimensions and the Euler angles
            init_velocities: initial velocity of the quadcopter in (x,y,z) dimensions
            init_angle_velocities: initial radians/second for each of the three Euler angles
            runtime: time limit for each episode
            target_pos: target/goal (x,y,z) position for the agent
        """
        # Simulation
        self.sim = PhysicsSim(init_pose, init_velocities, init_angle_velocities, runtime) 
        self.action_repeat = 3

        self.state_size = self.action_repeat * 6
        self.action_low = 0
        self.action_high = 900
        self.action_size = 4

        # Goal
        self.target_pos = target_pos if target_pos is not None else np.array([0., 0., 10.]) 

    def get_reward(self):
        """Uses current pose of sim to return reward."""
        reward = 1.-.001*(abs(self.sim.pose[:3] - self.target_pos)).sum()
        
        if (abs(self.sim.pose[0] - self.target_pos[0])) < 0.25:
            reward += 0.03
        if (abs(self.sim.pose[1] - self.target_pos[1])) < 0.25:
            reward += 0.03
        if (abs(self.sim.pose[2] - self.target_pos[2])) < 0.25:
            reward += 0.03
        
        if self.sim.time < self.sim.runtime and self.sim.done == True:
            reward -= 10

        #reward = 1
        #penalty = 0
        #current_position = self.sim.pose[:3]
        #distance = np.sqrt(((current_position - self.target_pos)**2).sum())
        
        # penalty for euler angles to avoid unecessary rotations
        #penalty += abs(self.sim.pose[3:6]).sum()
        
        #penalty for being distant from the target coordinates
        #penalty += .003*(abs(self.sim.pose[:3] - self.target_pos)).sum()
        
        #penalty for increasing total distance to the target
        #penalty += distance**2
        
        return reward

    def step(self, rotor_speeds):
        """Uses action to obtain next state, reward, done."""
        reward = 0
        pose_all = []
        for _ in range(self.action_repeat):
            done = self.sim.next_timestep(rotor_speeds) # update the sim pose and velocities
            reward += self.get_reward() 
            pose_all.append(self.sim.pose)
        next_state = np.concatenate(pose_all)
        return next_state, reward, done

    def reset(self):
        """Reset the sim to start a new episode."""
        self.sim.reset()
        state = np.concatenate([self.sim.pose] * self.action_repeat) 
        return state
class TaskModified():
    """Task (environment) that defines the goal and provides feedback to the agent."""
    """O novo objetivo é aterrissar o quadricóptero. Foi substituído a variável de posição
        de objetivo de 'target_pos' para target_pos_x_y' com apenas as posições de 'x' e 'y', sendo que a posição 'z' será sempre zero,
        representando a altura no ponto de pouso."""
    def __init__(self,
                 init_pose=None,
                 init_velocities=None,
                 init_angle_velocities=None,
                 runtime=5.,
                 target_pos_x_y=None):  # target_pos=None
        """Initialize a Task object.
        Params
        ======
            init_pose: initial position of the quadcopter in (x,y,z) dimensions and the Euler angles
            init_velocities: initial velocity of the quadcopter in (x,y,z) dimensions
            init_angle_velocities: initial radians/second for each of the three Euler angles
            runtime: time limit for each episode
            target_pos: target/goal (x,y,z) position for the agent
        """
        # Simulation
        self.sim = PhysicsSim(init_pose, init_velocities,
                              init_angle_velocities, runtime)
        self.action_repeat = 3

        self.state_size = self.action_repeat * 6
        self.action_low = 0
        self.action_high = 900
        self.action_size = 4

        # Goal
        # self.target_pos = target_pos if target_pos is not None else np.array([0., 0., 10.])
        """A altitude 'z' e os ângulos de Euler sempre serão 0. para o objetivo de aterrissagem, sendo necessário
            passar apenas os valores das coordenadas 'x' e 'y'."""
        self.target_pos = np.append(
            target_pos_x_y, 0.) if target_pos_x_y is not None else np.array(
                [0., 0., 0.])

    def get_reward(self):
        """Uses current pose of sim to return reward."""
        """É importante pontuar a recompensa não só pela posição alvo, mas também pela estabilidade na aterrissagem.
            Por isso, os ângulos de Euler também entraram no cálculo dessa pontuação, sendo dividido por igual o peso
            da pontuação que era de 0.3"""
        # reward = 1.-.3*(abs(self.sim.pose[:3] - self.target_pos)).sum()
        reward = 1. - .3 * (abs(self.sim.pose[:3] - self.target_pos)).sum(
        ) - .2 * (abs(self.sim.pose[3:] - [0., 0., 0.])).sum()
        return reward

    def step(self, rotor_speeds):
        """Uses action to obtain next state, reward, done."""
        reward = 0
        pose_all = []
        # rotor_speeds_suavised = []
        for _ in range(self.action_repeat):
            done = self.sim.next_timestep(
                rotor_speeds)  # update the sim pose and velocities
            """Repassa as velocidades de forma suavisada ao aproximar do alvo.
            for speed in rotor_speeds:
                rotor_speeds_suavised.append((speed/(self.init_height*1.25))+.2)
            done = self.sim.next_timestep(np.concatenate(rotor_speeds_suavised))"""
            reward += self.get_reward()
            pose_all.append(self.sim.pose)
        next_state = np.concatenate(pose_all)
        return next_state, reward, done

    def reset(self):
        """Reset the sim to start a new episode."""
        self.sim.reset()
        state = np.concatenate([self.sim.pose] * self.action_repeat)
        return state
    def __init__(self,
                 init_pos=None,
                 init_velocities=None,
                 init_angle_velocities=None,
                 runtime=3.,
                 target_pos=None,
                 action_repeat=3,
                 state_size=6,
                 action_low=0,
                 action_high=900,
                 goal="takeoff",
                 min_accuracy=1.5,
                 distance_factor=2.3,
                 angle_factor=6.3,
                 v_factor=4.5,
                 rotor_factor=0.1,
                 time_factor=0.1,
                 elevation_factor=2.4,
                 crash_factor=1.,
                 target_factor=1.):
        """Initialize a Task object.
        Params
        ======
            init_pos (array): initial position of the quadcopter in (x,y,z) dimensions and the Euler angles
            init_velocities (array): initial velocity of the quadcopter in (x,y,z) dimensions
            init_angle_velocities (array): initial radians/second for each of the three Euler angles
            runtime  (float): time limit for each episode
            target_pos (array): target/goal (x,y,z) position for the agent
            action_repeat (int): Number of timesteps to step agent
            state_size (int): Dimension of each state
            action_low (array): Min value of each action dimension
            action_high (array): Max value of each action dimension
            goal (string): Specifies what type of task the agent is attempting and sets standard initial values if not Otherwise specified
                Options: 'hover' (default), 'target', 'land', 'takeoff'
            min_accuracy (float): Sets how far the average deviation from the target point is acceptable
            distance_factor (float): Scalar that controls the relative reward for the distance measurement of the reward function  # DEPRECIATED
            angle_factor (float): Scalar that controls the relative reward for the angles off axis of the reward function  # DEPRECIATED
            v_factor (float): Scalar that controls the relative reward for the angular velocity of the reward function  # DEPRECIATED
            rotor_factor (float): Scalar that controls the relative reward for the relative rotor speed of the reward function  # DEPRECIATED
            time_factor (float): Scalar that controls the relative reward for the time bonus of the reward function  # DEPRECIATED
            elevation_factor (float): Scalar that controls the relative reward for the elevation measurement of the reward function  # DEPRECIATED
            crash_factor (float): Scalar that controls the relative reward for the crash detector of the reward function  # DEPRECIATED
            target_factor (float): Scalar that controls the relative reward for the goal conditions of the reward function  # DEPRECIATED

        """
        ## Set target and initial conditions based on the goal
        ## But doesn't override if other conditions were specified
        if goal is "target":
            self.target_pos = np.array(
                [10., 0., 10.]) if target_pos is None else target_pos

        elif goal is "hover":
            self.target_pos = np.array(
                [0., 0., 10.]) if target_pos is None else target_pos

        elif goal is "land":
            self.target_pos = np.array(
                [0., 0., 0.01]) if target_pos is None else target_pos

        elif goal is "takeoff":
            self.target_pos = np.array(
                [0., 0., 10.]) if target_pos is None else target_pos
            self.init_pos = np.array([0.0, 0.0, 0.1, 0.0, 0.0, 0.0
                                      ]) if init_pos is None else init_pos

        else:
            print(
                "It seems an improper goal was specified.  Please input one of the following options:\
                * 'hover'\n* 'target'\n* 'land'\n* 'takeoff'")

        if goal is not "takeoff":
            self.init_pos = np.array([0.0, 0.0, 10, 0.0, 0.0, 0.0
                                      ]) if init_pos is None else init_pos

        self.goal = goal

        ## Simulation
        self.sim = PhysicsSim(self.init_pos, init_velocities,
                              init_angle_velocities, runtime)
        self.action_repeat = action_repeat

        self.state_size = self.action_repeat * state_size
        self.action_low = action_low
        self.action_high = action_high
        self.action_size = 4  # Dimension of each action (one for each rotor)

        self.min_accuracy = min_accuracy

        self.distance_factor = distance_factor  # DEPRECIATED
        self.angle_factor = angle_factor  # DEPRECIATED
        self.v_factor = v_factor  # DEPRECIATED
        self.rotor_factor = rotor_factor  # DEPRECIATED
        self.time_factor = time_factor  # DEPRECIATED
        self.elevation_factor = elevation_factor  # DEPRECIATED
        self.crash_factor = crash_factor  # DEPRECIATED
        self.target_factor = target_factor  # DEPRECIATED

        ## Numerical representation of the distance to the target
        self.target_proximity = (abs(self.sim.pose[:3] -
                                     self.target_pos)).sum()
        self.total_proximity = []
        self.average_proximity = 0.
        self.best_proximity = np.inf
        self.previous_proximity = self.target_proximity

        ## DEPRECIATED
        self.params = {
            "Distance": self.distance_factor,
            "Angle": self.angle_factor,
            "Angular V": self.v_factor,
            "Rotor": self.rotor_factor,
            "Time": self.time_factor,
            "Elevation": self.elevation_factor,
            "Crash": self.crash_factor,
            "Target": self.target_factor
        }
Beispiel #8
0
class Task():
    """Task (environment) that defines the goal and provides feedback to the agent."""
    def __init__(self,
                 init_pose=None,
                 init_velocities=None,
                 init_angle_velocities=None,
                 runtime=5.,
                 target_pos=None):
        """Initialize a Task object.
        Params
        ======
            init_pose: initial position of the quadcopter in (x,y,z) dimensions and the Euler angles
            init_velocities: initial velocity of the quadcopter in (x,y,z) dimensions
            init_angle_velocities: initial radians/second for each of the three Euler angles
            runtime: time limit for each episode
            target_pos: target/goal (x,y,z) position for the agent
        """
        # Simulation
        self.sim = PhysicsSim(init_pose, init_velocities,
                              init_angle_velocities, runtime)
        self.action_repeat = 3

        self.state_size = self.action_repeat * 6
        self.action_low = 0.0
        self.action_high = 900.0
        self.action_size = 4

        # Goal
        self.target_pos = target_pos if target_pos is not None else np.array(
            [0., 0., 150.])

    def get_reward(self, rotor_speeds, init_pose):
        """Uses current pose of sim to return reward."""

        #reward = 1.-.3*(abs(self.sim.pose[:3] - self.target_pos)).sum()

        #REWARD FOR EVERY TIMESTEP
        reward_plus = 0.050  #0.05

        #REWARD / PUNISHMENT FOR TRAVELLING IN THE RIGHT DIRECTION
        if self.sim.pose[2] < init_pose[2]:
            reward_pose = -1 * 0.1
        else:
            reward_pose = (0.01 / 150) * (self.sim.pose[2] - init_pose[2]
                                          )  #0.05

        #PUNISHMENTS FOR MAKING ACTIONS THAT MAKE THE DRONE UNSTABIL
        #reward_pose = -1* (0.05000/10) * sum(abs(self.target_pos - self.sim.pose[:3]))
        #reward_pose = -1* (0.01200/10) * (abs(self.target_pos[2] - self.sim.pose[2])
        reward_angularv = -1 * (0.0100 / 30) * (max(abs(self.sim.angular_v)))
        reward_pose2 = -1 * (0.0100 / 6) * sum(abs(self.sim.pose[3:]))
        reward_rs = -1 * (0.0100 / 900) * (max(rotor_speeds) -
                                           min(rotor_speeds))
        reward_pose3 = -1 * (0.01000 / 10) * sum(
            abs(self.target_pos[:2] - self.sim.pose[:2]))

        reward = reward_plus + reward_pose + reward_angularv + reward_pose2 + reward_rs + reward_pose3

        #reward = np.clip(reward,-1.0,1.0)

        #return reward
        rewards = [
            reward, reward_pose, reward_angularv, reward_pose2, reward_rs
        ]
        return rewards

    def step(self, rotor_speeds, init_pose):
        """Uses action to obtain next state, reward, done."""

        rewards = np.array([0.0, 0.0, 0.0, 0.0, 0.0])
        pose_all = []
        for _ in range(self.action_repeat):
            done = self.sim.next_timestep(
                rotor_speeds)  # update the sim pose and velocities
            rewards += self.get_reward(rotor_speeds, init_pose)
            #reward += self.get_reward(rotor_speeds)
            pose_all.append(self.sim.pose)
        next_state = np.concatenate(pose_all)
        return next_state, rewards, done

    def reset(self):
        """Reset the sim to start a new episode."""
        self.sim.reset()
        state = np.concatenate([self.sim.pose] * self.action_repeat)
        return state
Beispiel #9
0
class Task():
    """Task (environment) that defines the goal and provides feedback to the agent."""
    def __init__(self,
                 init_pose=None,
                 init_velocities=None,
                 init_angle_velocities=None,
                 runtime=5.,
                 target_pos=None):
        """Initialize a Task object.
        Params
        ======
            init_pose: initial position of the quadcopter in (x,y,z) dimensions and the Euler angles
            init_velocities: initial velocity of the quadcopter in (x,y,z) dimensions
            init_angle_velocities: initial radians/second for each of the three Euler angles
            runtime: time limit for each episode
            target_pos: target/goal (x,y,z) position for the agent
        """
        # Simulation
        self.sim = PhysicsSim(init_pose, init_velocities,
                              init_angle_velocities, runtime)
        self.action_repeat = 3

        self.state_size = self.action_repeat * 6
        self.action_low = 0
        self.action_high = 900
        self.action_size = 1

        self.runtime = runtime

        # Goal
        self.target_pos = target_pos if target_pos is not None else np.array(
            [10., 10., 10.])

    def get_reward(self, done):
        """Uses current pose of sim to return reward."""
        x, y, z = self.sim.pose[0:3]
        x_a, y_a, z_a = self.sim.pose[3:6]
        xdot, ydot, zdot = self.sim.v[0:3]
        xdot_a, ydot_a, zdot_a = self.sim.angular_v[0:3]
        time = self.sim.time
        target_z = self.sim.pose[2]

        reward = 1. - .3 * (abs(self.sim.pose[:3] - self.target_pos[:3])).sum()

        #        reward = 1.

        #        reward = .1-.03*abs(self.sim.pose[:3]-#self.target_pos[:3]).sum()-.01*abs(ydot_a)-.01*abs(zdot_a)

        #        if time > self.runtime:
        #            reward += 1.

        #        if z > target_z and z <(target_z + 10):
        #            reward += 10.

        #        if done and time < self.runtime:
        #            reward += - 1 / time

        return reward

    def step(self, rotor_speeds):
        """Uses action to obtain next state, reward, done."""
        reward = 0
        pose_all = []
        for _ in range(self.action_repeat):
            done = self.sim.next_timestep(
                rotor_speeds)  # update the sim pose and velocities
            reward += self.get_reward(done)

            pose_all.append(self.sim.pose)
        next_state = np.concatenate(pose_all)
        return next_state, reward, done

    def reset(self):
        """Reset the sim to start a new episode."""
        self.sim.reset()
        state = np.concatenate([self.sim.pose] * self.action_repeat)
        return state
Beispiel #10
0
class Task():
    """Task (environment) that defines the goal and provides feedback to the agent."""
    def __init__(self,
                 init_pose=None,
                 init_velocities=None,
                 init_angle_velocities=None,
                 runtime=5.,
                 target_pos=None):
        """Initialize a Task object.
        Params
        ======
            init_pose: initial position of the quadcopter in (x,y,z) dimensions and the Euler angles
            init_velocities: initial velocity of the quadcopter in (x,y,z) dimensions
            init_angle_velocities: initial radians/second for each of the three Euler angles
            runtime: time limit for each episode
            target_pos: target/goal (x,y,z) position for the agent
        """
        # Simulation
        self.sim = PhysicsSim(init_pose, init_velocities,
                              init_angle_velocities, runtime)
        self.action_repeat = 3

        self.state_size = self.action_repeat * 6
        self.action_low = 0
        self.action_high = 900
        self.action_size = 4
        # Goal
        self.target_pos = target_pos if target_pos is not None else np.array(
            [0., 0., 10.])

    def get_reward(self):

        #pCenterXYZ = 0

        #pStability = -(abs(self.sim.pose[3:6]).sum()**2)
        #pCenterXYZ += -(abs(current_position[0]-self.target_pos[0])**2)
        #pCenterXYZ += -(abs(current_position[1]-self.target_pos[1])**2)
        #pCenterXYZ += -(abs(current_position[2]-self.target_pos[2])**2)

        #sum_acceleration = -np.linalg.norm(self.sim.linear_accel)

        #distance = np.sqrt((current_position[0]-self.target_pos[0])**2 + (current_position[1]-self.target_pos[1])**2 + (current_position[2]-self.target_pos[2])**2)

        reward = 1 - 0.003 * (abs(self.sim.pose[:3] - self.target_pos)).sum()

        #reward = -abs(self.target_pos[2] - self.sim.pose[2])

        def is_equal(x, y, delta=0.0):
            return abs(x - y) <= delta

        if is_equal(self.target_pos[2], self.sim.pose[2], delta=1):
            reward += 10.0

        return reward

    def step(self, rotor_speeds):
        """Uses action to obtain next state, reward, done."""
        reward = 0
        pose_all = []

        for _ in range(self.action_repeat):
            done = self.sim.next_timestep(
                rotor_speeds)  # update the sim pose and velocities
            reward += self.get_reward()
            pose_all.append(self.sim.pose)
            if done:
                reward += 10.0

        next_state = np.concatenate(pose_all)

        return next_state, reward, done

    def reset(self):
        """Reset the sim to start a new episode."""
        self.sim.reset()
        state = np.concatenate([self.sim.pose] * self.action_repeat)
        return state
Beispiel #11
0
class Task():
    """Task (environment) that defines the goal and provides feedback to the agent."""
    def __init__(self,
                 init_pose=None,
                 init_velocities=None,
                 init_angle_velocities=None,
                 runtime=5.,
                 target_pos=None):
        """Initialize a Task object.
        Params
        ======
            init_pose: initial position of the quadcopter in (x,y,z) dimensions and the Euler angles
            init_velocities: initial velocity of the quadcopter in (x,y,z) dimensions
            init_angle_velocities: initial radians/second for each of the three Euler angles
            runtime: time limit for each episode
            target_pos: target/goal (x,y,z) position for the agent
        """
        # Simulation
        self.sim = PhysicsSim(init_pose, init_velocities,
                              init_angle_velocities, runtime)
        self.action_repeat = 3

        self.state_size = self.action_repeat * 6
        self.action_low = 0
        self.action_high = 900
        self.action_size = 4

        # Goal
        self.target_pos = target_pos if target_pos is not None else np.array(
            [0., 0., 10.])

    def get_reward(self, done):
        """Uses current pose of sim to return reward."""
        reward = 0
        if done and self.sim.time < self.sim.runtime:
            reward += -1
        if self.sim.v[0] > 0 or self.sim.v[0] < 0:
            reward -= 10

        if self.sim.v[1] > 0 or self.sim.v[1] < 0:
            reward -= 10

        if self.sim.v[2] > 0:
            reward += 50
        euler_angles = self.sim.pose[3:6].sum()
        reward -= euler_angles
        lateral_distance = ((self.sim.pose[0] - self.target_pos[0]) +
                            (self.sim.pose[1] - self.target_pos[1]))**2
        reward -= lateral_distance
        vertical_distance = (self.sim.pose[2] - self.target_pos[2])**2
        if vertical_distance != 0:
            reward += 100 / vertical_distance
        #reward += 1.-.003*(abs(self.sim.pose[:3] - self.target_pos)).sum()
        reward = np.tanh(reward)
        return reward

    def step(self, rotor_speeds):
        """Uses action to obtain next state, reward, done."""
        reward = 0
        pose_all = []
        for _ in range(self.action_repeat):
            done = self.sim.next_timestep(
                rotor_speeds)  # update the sim pose and velocities
            reward += self.get_reward(done)
            pose_all.append(self.sim.pose)
        next_state = np.concatenate(pose_all)
        return next_state, reward, done

    def reset(self):
        """Reset the sim to start a new episode."""
        self.sim.reset()
        state = np.concatenate([self.sim.pose] * self.action_repeat)
        return state
Beispiel #12
0
class Task():
    """Task (environment) that defines the goal and provides feedback to the agent."""
    def __init__(self, init_pose=None, init_velocities=None,
        init_angle_velocities=None, runtime=5., target_pos=None):
        """Initialize a Task object.
        Params
        ======
            init_pose: initial position of the quadcopter in (x,y,z) dimensions and the Euler angles
            init_velocities: initial velocity of the quadcopter in (x,y,z) dimensions
            init_angle_velocities: initial radians/second for each of the three Euler angles
            runtime: time limit for each episode
            target_pos: target/goal (x,y,z) position for the agent
        """
        # Simulation
        self.sim = PhysicsSim(init_pose, init_velocities, init_angle_velocities, runtime)
        self.action_repeat = 3

        self.state_size = self.action_repeat * 22
        self.action_low = 325 #400-500 flies, 400 doesn't
        self.action_high = 425
        self.action_size = 4

        # Goal
        self.target_pos = target_pos if target_pos is not None else np.array([0., 0., 10.])

    def get_reward(self, rotor_speeds):
#         """Uses current pose of sim to return reward."""
        pose_error = abs(self.target_pos - self.sim.pose[:3])

        distance = np.linalg.norm(self.target_pos - self.sim.pose[:3])
        avg_angle = np.mean(self.sim.pose[3:])
        linear_accel = np.linalg.norm(self.sim.linear_accel)
        angular_accels = np.linalg.norm(self.sim.angular_accels)

        reward = 0
#         reward -= 10 * pose_error[2]
#         reward -= 10 * pose_error[1]
#         reward -= 10 * pose_error[0]
#         reward -= 0.1 * linear_accel.sum()
#         reward -= 0.1 * angular_accels.sum()
        # reward -= 1.0 * np.std(rotor_speeds)
        # reward -= 1.0 * avg_angle
#         print('rotor speed {}'.format(np.std(rotor_speeds)))

        reward += 1000
        reward -= 0.05 * (distance ** 2)

        return reward / 1000.0

    def step(self, rotor_speeds):
        """Uses action to obtain next state, reward, done."""
        # make sure it's an actual array
        rotor_speeds = np.array(rotor_speeds)
        #rescale from -1 to +1 to full range
        rotor_speeds = (self.action_high - self.action_low)/2 * (rotor_speeds + 1) + self.action_low
        rotor_speeds = np.ones_like(rotor_speeds) * rotor_speeds[0]
        # print('\r rotor speeds {}'.format(rotor_speeds), end='')
        reward = 0
        pose_all = []
        for _ in range(self.action_repeat):
            done = self.sim.next_timestep(rotor_speeds) # update the sim pose and velocities
            reward += self.get_reward(rotor_speeds)
            pose_all.append(np.concatenate([self.sim.pose, self.sim.v, self.sim.angular_v, self.sim.linear_accel, self.sim.angular_accels, self.sim.prop_wind_speed]))
        next_state = np.concatenate(pose_all)
        return next_state, reward, done

    def reset(self):
        """Reset the sim to start a new episode."""
        self.sim.reset()
        state = np.concatenate([self.sim.pose, self.sim.v, self.sim.angular_v, self.sim.linear_accel, self.sim.angular_accels, self.sim.prop_wind_speed] * self.action_repeat)
        return state

    def dump(self):
        print("time {:4.1f} x {:=+04.1f} y {:=+04.1f} z {:=+04.1f}".format(self.sim.time, self.sim.pose[0], self.sim.pose[1], self.sim.pose[2]))
Beispiel #13
0
class Task():
    """Task (environment) that defines the goal and provides feedback to the agent."""
    def __init__(self, init_pose=None, init_velocities=None, 
        init_angle_velocities=None, runtime=5., target_pos=None):
        """Initialize a Task object.
        Params
        ======
            init_pose: initial position of the quadcopter in (x,y,z) dimensions and the Euler angles
            init_velocities: initial velocity of the quadcopter in (x,y,z) dimensions
            init_angle_velocities: initial radians/second for each of the three Euler angles
            runtime: time limit for each episode
            target_pos: target/goal (x,y,z) position for the agent // changed to target only z
        """
        # Simulation
        self.sim = PhysicsSim(init_pose, init_velocities, init_angle_velocities, runtime) 
        self.action_repeat = 3
        
        #default state_size
        #self.state_size = self.action_repeat * len(self.sim.pose)
        
        #simplified state
        self.state_size = self.action_repeat
        
        self.action_low = 0    #default 0
        self.action_high = 900   #default 900
        self.action_size = 1     #default 4

        # Goal
        self.target_pos = target_pos if target_pos is not None else np.array([0., 0., 10.]) 

    def get_reward(self):
        """Uses current pose of sim to return reward."""
        #default value
        #reward = 1.-.3*(abs(self.sim.pose[:3] - self.target_pos)).sum()
               
        #my reward
        reward  = 1 - 0.01 * abs(self.sim.pose[2] - self.target_pos) - 0.01*abs(self.sim.v[2])
               
        return reward

    def step(self, rotor_speeds):
        """Uses action to obtain next state, reward, done."""
        reward = 0.0
        pose_all = []
        for _ in range(self.action_repeat):
            done = self.sim.next_timestep(rotor_speeds) # update the sim pose and velocities
            reward += self.get_reward()
            #default pose_all
            #pose_all.append(self.sim.pose)
            
            #simple state
            pose_all.append(self.sim.pose[2])
        
        #default next_state
        #next_state = np.concatenate(pose_all)
        
        #simple next_state
        next_state = np.array(pose_all)
        return next_state, reward, done

    def reset(self):
        """Reset the sim to start a new episode."""
        self.sim.reset()
        #default state, uses x,y,z,phi,theta,psi repeated n times
        #state = np.concatenate([self.sim.pose] * self.action_repeat) 
        
        #simple state
        state = [self.sim.pose[2]] * self.action_repeat
        return state
class Takeoff():
    """Task (environment) that defines the goal and provides feedback to the agent."""
    def __init__(self, init_pose=None, init_velocities=None, 
        init_angle_velocities=None, runtime=5., target_pos=None,position_noise=None):
        """Initialize a Task object.
        Params
        ======
            init_pose: initial position of the quadcopter in (x,y,z) dimensions and the Euler angles
            init_velocities: initial velocity of the quadcopter in (x,y,z) dimensions
            init_angle_velocities: initial radians/second for each of the three Euler angles
            runtime: time limit for each episode
            target_pos: target/goal (x,y,z) position for the agent
        """
        print("########2 Task to Takeoff")
        # Simulation
        self.sim = PhysicsSim(init_pose, init_velocities, init_angle_velocities, runtime) 
        self.action_repeat = 3

        self.state_size = self.action_repeat * 6
        self.action_low = 0
        self.action_high = 900
        self.action_size = 4
        self.runtime = runtime
        self.position_noise = position_noise
        # Goal
        self.target_pos = target_pos if target_pos is not None else np.array([0., 0., 10.]) 

    def get_reward(self):
        """Uses current pose of sim to return reward."""
        #reward = 1.-.3*(abs(self.sim.pose[:3] - self.target_pos)).sum()
        reward = 0.0

        #encorage acceleration in the +z-direction
        #reward += self.sim.linear_accel[2]
        #reward +=  0.5 * self.sim.linear_accel[2] * self.sim.dt**2

        #encorage velocity in the +z-direction
        if  self.sim.v[2] > 0:
            reward = 10*self.sim.v[2]
        else:
            reward = -10

        
        
        
        
        



        #encorage smaller position errors and limit the demenonator to not be too much close to zero
        #posDiff = -abs(self.sim.pose[ 2] - self.target_pos[ 2]) #**2 
        #if posDiff > 0.1: 
        #    reward += 10/posDiff 
        #else :
        #    reward += 100


        return reward

    def step(self, rotor_speeds):
        """Uses action to obtain next state, reward, done."""
        reward = 0
        pose_all = []
        for _ in range(self.action_repeat):
            done = self.sim.next_timestep(rotor_speeds) # update the sim pose and velocities
            reward += self.get_reward() 
            pose_all.append(self.sim.pose)

            #sparse reward at the end if target is reached 
            if(self.sim.pose[2] >= self.target_pos[2]):
                print("\n sucessfull takeoff to target target  !!!!!!!!!")
                reward += 10
                done = True

        #if np.isclose(self.target_pos[2], self.sim.pose[2], 1):
        #    reward += 100
        #    done = True



        #if(self.sim.pose[2] >= self.target_pos[2] and self.sim.time >= self.runtime):
        #if np.allclose(self.sim.pose[:-3],self.target_pos, rtol=1):
        #    reward +=100
        #    done = True

        next_state = np.concatenate(pose_all)
        return next_state, reward, done


    def reset(self):
        """Reset the sim with noise to start a new episode."""
        self.sim.reset()

        #::os:: add random start position as suggested by some papers to make agent more generalized
        if self.position_noise is not None or self.ang_noise is not None:
            rand_pose = np.copy(self.sim.init_pose)
            if self.position_noise is not None and self.position_noise > 0:
                rand_pose[:3] += np.random.normal(0.0, self.position_noise, 3)
                #print("start rand_pose =",rand_pose) 

            self.sim.pose = np.copy(rand_pose)

        state = np.concatenate([self.sim.pose] * self.action_repeat)
        return state
Beispiel #15
0
class TaskTakeOff():
    '''Task(environment) that defines the goal and provides feedback to the agent'''

    def __init__(self, init_pose=None, init_velocities=None,\
                 init_angle_velocities=None, runtime=5., target_pos=None):
        '''initialize a task object
        Params
        ======
        init_pose               : initial position of the quadcopter in (x,y,z) dimensions and the Euler angles
        init_velocities         : initial velocities of the quadcopter in (x,y,z) dimensions
        init_angle_velocities   : initial radians/second for each of the three Euler angles
        runtime                 : time limit for each episode
        target_pos              : target/goal (x,y,z) position for the agent
        
        Footnote
        ========
        adjust action_low and action_high to shrink the continuous action space
        
        '''

        # simulation
        self.sim = PhysicsSim(init_pose, init_velocities,
                              init_angle_velocities, runtime)
        self.action_repeat = 3

        self.state_size = self.action_repeat * 6
        self.action_low = 410
        self.action_high = 420
        self.action_size = 4

        # goal
        self.target_pos = target_pos if target_pos is not None else np.array(
            [0., 0., 10.])

    def get_reward(self):
        '''use current position of simulation to return reward
        
        Footnote:
        ========
        1. include vertical velocity as part of the reward, encouraging the copter to fly vertically
        2. penalize crash
        3. reward clipping to range (-1,1)
        4. or use np.tanh() to squeeze reward in range (-1,1)

        '''
        # version 01 - udacity task template
        #reward = 1.-.3*(abs(self.sim.pose[:3] - self.target_pos)).sum() + 0.3*(self.sim.v[2])
        #reward = np.clip(reward, -1,1)

        # version 02 - udacity takeoff task template
        # reward = zero for matching target z, -ve as you go farther, upto -20
        #reward = -min(abs(self.target_pos[2] - self.sim.pose[2]), 20.)

        #if self.sim.pose[2] > self.target_pos[2]:
        #    reward += 1. # bonus reward
        #elif self.sim.done and self.sim.time < self.sim.runtime:
        #    reward -= 1. # crash before timeout

        # version 03 - use np.tanh()
        reward = 3.0 * self.sim.v[2] - 0.025 * abs(self.sim.v[:2]).sum(
        ) + 1.5 * self.sim.pose[2] - 0.25 * (abs(self.sim.pose[:2]).sum())
        reward = np.tanh(reward)

        return reward

    def step(self, rotor_speeds):
        '''use action to obtain next state, reward, done
        
        Footnote
        ========
        self.sim.next_timestep()    : take in action inputs and update the sim.pose to new position,
                                      accumulate the system run-time, return flag of done status
        
        '''

        reward = 0
        pose_all = []
        for _ in range(self.action_repeat):
            done = self.sim.next_timestep(
                rotor_speeds)  # update the simulation position and velocities
            reward += self.get_reward()
            pose_all.append(self.sim.pose)
        next_state = np.concatenate(
            pose_all
        )  # total state_size: action_repeat * simulation's position
        return next_state, reward, done

    def reset(self):
        '''reset the simulation to start a new episode'''
        self.sim.reset()
        state = np.concatenate([self.sim.pose] * self.action_repeat)
        return state
class Task():
    """Task (environment) that defines the goal and provides feedback to the agent."""
    def __init__(self,
                 init_pose=np.array([0.0, 0.0, 0.0, 0.0, 0.0, 0.0]),
                 init_velocities=None,
                 init_angle_velocities=None,
                 runtime=5.,
                 target_pos=None,
                 action_repeat=None):
        """Initialize a Task object.
        Params
        ======
            init_pose: initial position of the quadcopter in (x,y,z) dimensions and the Euler angles
            init_velocities: initial velocity of the quadcopter in (x,y,z) dimensions
            init_angle_velocities: initial radians/second for each of the three Euler angles
            runtime: time limit for each episode
            target_pos: target/goal (x,y,z) position for the agent
        """
        # Simulation
        self.sim = PhysicsSim(init_pose, init_velocities,
                              init_angle_velocities, runtime)
        self.action_repeat = action_repeat if action_repeat is not None else 3

        self.init_pos = init_pose
        self.state_size = self.action_repeat * 6
        self.action_low = 10
        self.action_high = 900
        self.action_size = 4

        # Goal
        self.target_pos = target_pos if target_pos is not None else np.array(
            [0., 0., 10.])
        self.flight_path = self.target_pos - self.init_pos[:3]
        self.num_steps = 0
        #np.seterr(all='raise')

    def get_reward(self):
        """Uses current pose of sim to return reward."""
        reward = 1. - .2 * (abs(self.sim.pose[:3] - self.target_pos)).sum()
        # reward = 0
        # if not np.all(self.target_pos == self.init_pos[:3]):
        #     a = np.cross(self.target_pos - self.sim.pose[:3], self.flight_path)
        #     reward -= np.linalg.norm(a) / np.linalg.norm(self.flight_path)
        # reward -= np.linalg.norm([self.sim.pose[:3] - self.target_pos])

        # print('task reward : ',reward)
        return reward

    def step(self, rotor_speeds):
        """Uses action to obtain next state, reward, done."""
        self.num_steps += 1
        reward = 0
        pose_all = []
        for _ in range(self.action_repeat):
            done = self.sim.next_timestep(
                rotor_speeds)  # update the sim pose and velocities
            reward += self.get_reward()
            pose_all.append(self.sim.pose)
        next_state = np.concatenate(pose_all)
        return next_state, reward, done

    def reset(self):
        """Reset the sim to start a new episode."""
        self.sim.reset()
        self.num_steps = 0
        state = np.concatenate([self.sim.pose] * self.action_repeat)
        return state
class Task_Hover():
    """Task (environment) that defines the goal and provides feedback to the agent."""
    def __init__(self,
                 init_pose=None,
                 init_velocities=None,
                 init_angle_velocities=None,
                 runtime=5.,
                 target_pos=None):
        """Initialize a Task object.
        Params
        ======
            init_pose: initial position of the quadcopter in (x,y,z) dimensions and the Euler angles
            init_velocities: initial velocity of the quadcopter in (x,y,z) dimensions
            init_angle_velocities: initial radians/second for each of the three Euler angles
            runtime: time limit for each episode
            target_pos: target/goal (x,y,z) position for the agent
        """
        # Simulation
        self.sim = PhysicsSim(init_pose, init_velocities,
                              init_angle_velocities, runtime)
        self.action_repeat = 3

        self.state_size = self.action_repeat * 6
        self.action_low = 10
        self.action_high = 100
        self.action_size = 4

        # Goal
        self.target_pos = target_pos if target_pos is not None else np.array(
            [0., 0., 10.])

    def get_reward(self):
        """Uses current pose of sim to return reward."""
        reward = 0
        factor = 3
        dis = self.sim.pose[2] - self.target_pos[2]
        if (dis >= 0):  # agent above or equal the target
            reward += dis * factor
        else:  # agent below the target
            reward += (1 / np.abs(dis)) * factor
        # to make it in a range [-1,1]
        reward = np.tanh(reward)
        #'''
        return reward

    def step(self, rotor_speeds):
        """Uses action to obtain next state, reward, done."""
        reward = 0
        pose_all = []
        for _ in range(self.action_repeat):
            done = self.sim.next_timestep(
                rotor_speeds)  # update the sim pose and velocities
            reward += self.get_reward()
            pose_all.append(self.sim.pose)
        next_state = np.concatenate(pose_all)
        return next_state, reward, done

    def reset(self):
        """Reset the sim to start a new episode."""
        self.sim.reset()
        state = np.concatenate([self.sim.pose] * self.action_repeat)
        return state
Beispiel #18
0
class TaskLanderWithPhy():
    """Task (environment) that defines the goal and provides feedback to the agent."""
    def __init__(self,
                 env=None,
                 init_pose=None,
                 init_velocities=None,
                 init_angle_velocities=None,
                 runtime=5.,
                 target_pos=None):
        """Initialize a Task object.
        Params
        ======
            init_pose: initial position of the quadcopter in (x,y,z) dimensions and the Euler angles
            init_velocities: initial velocity of the quadcopter in (x,y,z) dimensions
            init_angle_velocities: initial radians/second for each of the three Euler angles
            runtime: time limit for each episode
            target_pos: target/goal (x,y,z) position for the agent
        """
        # Simulation
        self.sim = PhysicsSim(init_pose, init_velocities,
                              init_angle_velocities, runtime)
        self.action_repeat = 4

        self.state_size = self.action_repeat * 6
        self.action_low = 0
        self.action_high = 900
        self.action_size = 40

        # Goal
        self.target_pos = target_pos if target_pos is not None else np.array(
            [0., 0., 10.])

        #-------------------------------------------------------------

        #-------------------------------------------------------------

        self.env = env
        self.env.continuous = False

#         self.action_repeat = 3
#         self.state_size = self.action_repeat * np.prod(env.observation_space.shape)

#         self.action_size = np.prod(env.action_space.shape)
#-------------------------------------------------------------
# lunder.demo_heuristic_lander(self.env, render=True)

#     def step(self, action):
#         """Uses action to obtain next state, reward, done."""
#         total_reward = 0
#         reward = 0
#         done = 0
#         obs = []
#         for _ in range(self.action_repeat):
#             ob, reward, done, info = self.env.step(action)
#             done = self.sim.next_timestep(action)

#             total_reward += reward
#             obs.append(ob)

#         next_state = np.concatenate(obs)

#         return next_state, reward, done

    def get_reward(self):
        """Uses current pose of sim to return reward."""
        reward = 1.3 * (abs(self.sim.v[2]).sum())
        # print('velocity is ' , self.sim.v[2] , 'position',self.sim.pose[:3] ,'target' ,self.target_pos )
        # print ('reward',reward)
        # if self.env.game_over or abs(self.env.state[0]) >= 1.0:
        #     done   = True
        #     reward = -100
        # if not self.env.lander.awake:
        #     done   = True
        #     reward = +100
        # print(self.sim.pose[:3])
        return reward

    def step(self, rotor_speeds):
        """Uses action to obtain next state, reward, done."""
        reward = 0
        # total_reward - 0
        pose_all = []
        for _ in range(self.action_repeat):
            # ob, reward, done, info = self.env.step(action)
            # total_reward  += reward
            done = self.sim.next_timestep(
                rotor_speeds)  # update the sim pose and velocities
            reward += self.get_reward()
            # self.sim.pose = ob
            pose_all.append(self.sim.pose)
        next_state = np.concatenate(pose_all)
        return next_state, reward, done

    def reset(self):
        """Reset the sim to start a new episode."""
        ob = self.env.reset()
        state = np.concatenate([ob] * self.action_repeat)
        return state
Beispiel #19
0
class Task():
    """Task (environment) that defines the goal and provides feedback to the agent."""
    def __init__(self,
                 init_pose=None,
                 init_velocities=None,
                 init_angle_velocities=None,
                 runtime=5.,
                 target_pos=None):
        """Initialize a Task object.
        Params
        ======
            init_pose: initial position of the quadcopter in (x,y,z) dimensions and the Euler angles
            init_velocities: initial velocity of the quadcopter in (x,y,z) dimensions
            init_angle_velocities: initial radians/second for each of the three Euler angles
            runtime: time limit for each episode
            target_pos: target/goal (x,y,z) position for the agent
        """
        # Simulation
        self.sim = PhysicsSim(init_pose, init_velocities,
                              init_angle_velocities, runtime)
        self.action_repeat = 3

        self.state_size = self.action_repeat * 6
        self.action_low = 0
        self.action_high = 900
        self.action_size = 4

        # Goal
        self.target_pos = target_pos if target_pos is not None else np.array(
            [0., 0., 10.])

    def get_reward(self):
        """Uses current pose of sim to return reward."""
        #reward = 1.-.3*(abs(self.sim.pose[:3] - self.target_pos)).sum()
        reward = penalty = 0
        curr_pos = self.sim.pose[:3]

        #distance from current position to target
        distance = np.sqrt((curr_pos[0] - self.target_pos[0])**2 +
                           (curr_pos[1] - self.target_pos[1])**2 +
                           (curr_pos[2] - self.target_pos[2])**2)

        if distance < 5:
            reward += 50

        #reward += 1000
        euler_angles = self.sim.pose[3:6]
        sum_of_euler_angles = abs(euler_angles).sum()

        # add the sum of euler angles metric to penalty
        penalty += sum_of_euler_angles

        if self.sim.v[2] > 0:
            reward += 100

        done = False
        if self.sim.pose[2] >= self.target_pos[
                2]:  # agent has crossed the target height
            # raise TypeError
            reward += 100  # bonus reward (set value as per your expeience on the project)
            done = True

        return reward - (penalty * 0.1), done

    def step(self, rotor_speeds):
        """Uses action to obtain next state, reward, done."""
        reward = 0
        pose_all = []
        for _ in range(self.action_repeat):
            done = self.sim.next_timestep(
                rotor_speeds)  # update the sim states (pose,velocities) &
            delta_reward, done_height = self.get_reward()
            reward += delta_reward
            if done_height:
                done = done_height
            pose_all.append(self.sim.pose)
        next_state = np.concatenate(pose_all)
        return next_state, reward, done

    def reset(self):
        """Reset the sim to start a new episode."""
        self.sim.reset()
        state = np.concatenate([self.sim.pose] * self.action_repeat)
        return state
Beispiel #20
0
class Task():
    """Task (environment) that defines the goal and provides feedback to the agent."""
    def __init__(self,
                 init_pose=None,
                 init_velocities=None,
                 init_angle_velocities=None,
                 runtime=5.,
                 target_pos=None):
        """Initialize a Task object.
        Params
        ======
            init_pose: initial position of the quadcopter in (x,y,z) dimensions and the Euler angles
            init_velocities: initial velocity of the quadcopter in (x,y,z) dimensions
            init_angle_velocities: initial radians/second for each of the three Euler angles
            runtime: time limit for each episode
            target_pos: target/goal (x,y,z) position for the agent
        """
        # Simulation
        # The simulator is initialized as an instance of the PhysicsSim class (from physics_sim.py).
        self.sim = PhysicsSim(init_pose, init_velocities,
                              init_angle_velocities, runtime)

        #Inspired by the methodology in the original DDPG paper, we make use of action repeats. For each timestep of the agent, we step the simulation action_repeats timesteps.
        # If you are not familiar with action repeats, please read the Results section in the DDPG paper.
        self.action_repeat = 3

        #We set the number of elements in the state vector. For the sample task, we only work with the 6-dimensional pose information. T
        # To set the size of the state (state_size), we must take action repeats into account.
        self.state_size = self.action_repeat * 6

        #The environment will always have a 4-dimensional action space, with one entry for each rotor (action_size=4).
        # You can set the minimum (action_low) and maximum (action_high) values of each entry here.
        self.action_low = 0
        self.action_high = 900
        self.action_size = 4

        #TARGET/GOAL: The sample task in this provided file is for the agent to reach a target position. We specify that target position as a variable.
        # Goal
        self.target_pos = target_pos if target_pos is not None else np.array(
            [0., 0., 10.])

    #Then, the reward is computed from get_reward(). The episode is considered done if the time limit has been exceeded,
    # or the quadcopter has travelled outside of the bounds of the simulation.
    def get_reward(self):
        raise NotImplementedError("{} must override get_reward()".format(
            self.__class__.__name__))

    #The step() method is perhaps the most important. It accepts the agent's choice of action rotor_speeds, which is used to prepare the next state to pass on to the agent
    def step(self, rotor_speeds):
        """Uses action to obtain next state, reward, done."""
        reward = 0
        pose_all = []
        for _ in range(self.action_repeat):
            done = self.sim.next_timestep(
                rotor_speeds)  # update the sim pose and velocities
            reward += self.get_reward()
            pose_all.append(self.sim.pose)
        next_state = np.concatenate(pose_all)
        return next_state, reward, done

    #The reset() method resets the simulator. The agent should call this method every time the episode ends.
    def reset(self):
        """Reset the sim to start a new episode."""
        self.sim.reset()
        state = np.concatenate([self.sim.pose] * self.action_repeat)
        return state
Beispiel #21
0
class Land():
    """Task (environment) that defines the goal and provides feedback to the agent."""
    def __init__(self, init_pose=None, init_velocities=None, 
        init_angle_velocities=None, runtime=5., target_pos=None):
        """Initialize a Task object.
        Params
        ======
            init_pose: initial position of the quadcopter in (x,y,z) dimensions and the Euler angles
            init_velocities: initial velocity of the quadcopter in (x,y,z) dimensions
            init_angle_velocities: initial radians/second for each of the three Euler angles
            runtime: time limit for each episode
            target_pos: target/goal (x,y,z) position for the agent
        """
        # Simulation
        self.sim = PhysicsSim([0.0, 0.0, 10.0,0.0,0.0,0.0], init_velocities, init_angle_velocities, runtime) 
        self.action_repeat = 3

        self.state_size = self.action_repeat * 6
        self.action_low = 0
        self.action_high = 900
        self.action_size = 4

        # Goal
        if target_pos is None :
            print("Setting default init pose")
        self.target_pos = 0.0
        
        self.max_duration = 5.0 #sec

    def get_reward(self):
        """Uses current pose of sim to return reward."""
        #reward = np.tanh(1 - 0.003*(abs(self.sim.pose[:3] - self.target_pos))).sum()
        
        if abs(self.target_pos - self.sim.pose[2])< 0.003:
            reward = reward = -abs(self.target_pos - self.sim.pose[2])/100.0
            
        if abs(self.target_pos - self.sim.pose[2])> 5:
            reward =-abs(self.target_pos - self.sim.pose[2])/5.0
        else:
             reward = -abs(self.target_pos - self.sim.pose[2])/10.0
        
        return reward


    def step(self, rotor_speeds):
        """Uses action to obtain next state, reward, done."""
        reward = 0
        pose_all = []
        for _ in range(self.action_repeat):
            done = self.sim.next_timestep(rotor_speeds) # update the sim pose and velocities
            reward += self.get_reward()
            pose_all.append(self.sim.pose)
            if done :
                if (self.sim.pose[2] - self.target_pos)<0.5:  # agent has crossed the target height
                    reward += 7.0  # bonus reward
                if (self.sim.pose[2] - self.target_pos)>3:  # agent has crossed the target height
                    reward -= 1.0  # bonus reward
                
                if self.sim.time > self.max_duration:  # agent has run out of time
                    reward -= 10.0  # extra penalty
                if self.sim.time < self.max_duration:  # agent has run out of time
                    reward += 3.0  # bonus reward
                    
        next_state = np.concatenate(pose_all)
        return next_state, reward, done

    def reset(self):
        """Reset the sim to start a new episode."""
        self.sim.reset()
        state = np.concatenate([self.sim.pose] * self.action_repeat) 
        return state
class Task():
    """Task (environment) that defines the goal and provides feedback to the agent."""
    def __init__(self,
                 init_pos=None,
                 init_velocities=None,
                 init_angle_velocities=None,
                 runtime=3.,
                 target_pos=None,
                 action_repeat=3,
                 state_size=6,
                 action_low=0,
                 action_high=900,
                 goal="takeoff",
                 min_accuracy=1.5,
                 distance_factor=2.3,
                 angle_factor=6.3,
                 v_factor=4.5,
                 rotor_factor=0.1,
                 time_factor=0.1,
                 elevation_factor=2.4,
                 crash_factor=1.,
                 target_factor=1.):
        """Initialize a Task object.
        Params
        ======
            init_pos (array): initial position of the quadcopter in (x,y,z) dimensions and the Euler angles
            init_velocities (array): initial velocity of the quadcopter in (x,y,z) dimensions
            init_angle_velocities (array): initial radians/second for each of the three Euler angles
            runtime  (float): time limit for each episode
            target_pos (array): target/goal (x,y,z) position for the agent
            action_repeat (int): Number of timesteps to step agent
            state_size (int): Dimension of each state
            action_low (array): Min value of each action dimension
            action_high (array): Max value of each action dimension
            goal (string): Specifies what type of task the agent is attempting and sets standard initial values if not Otherwise specified
                Options: 'hover' (default), 'target', 'land', 'takeoff'
            min_accuracy (float): Sets how far the average deviation from the target point is acceptable
            distance_factor (float): Scalar that controls the relative reward for the distance measurement of the reward function  # DEPRECIATED
            angle_factor (float): Scalar that controls the relative reward for the angles off axis of the reward function  # DEPRECIATED
            v_factor (float): Scalar that controls the relative reward for the angular velocity of the reward function  # DEPRECIATED
            rotor_factor (float): Scalar that controls the relative reward for the relative rotor speed of the reward function  # DEPRECIATED
            time_factor (float): Scalar that controls the relative reward for the time bonus of the reward function  # DEPRECIATED
            elevation_factor (float): Scalar that controls the relative reward for the elevation measurement of the reward function  # DEPRECIATED
            crash_factor (float): Scalar that controls the relative reward for the crash detector of the reward function  # DEPRECIATED
            target_factor (float): Scalar that controls the relative reward for the goal conditions of the reward function  # DEPRECIATED

        """
        ## Set target and initial conditions based on the goal
        ## But doesn't override if other conditions were specified
        if goal is "target":
            self.target_pos = np.array(
                [10., 0., 10.]) if target_pos is None else target_pos

        elif goal is "hover":
            self.target_pos = np.array(
                [0., 0., 10.]) if target_pos is None else target_pos

        elif goal is "land":
            self.target_pos = np.array(
                [0., 0., 0.01]) if target_pos is None else target_pos

        elif goal is "takeoff":
            self.target_pos = np.array(
                [0., 0., 10.]) if target_pos is None else target_pos
            self.init_pos = np.array([0.0, 0.0, 0.1, 0.0, 0.0, 0.0
                                      ]) if init_pos is None else init_pos

        else:
            print(
                "It seems an improper goal was specified.  Please input one of the following options:\
                * 'hover'\n* 'target'\n* 'land'\n* 'takeoff'")

        if goal is not "takeoff":
            self.init_pos = np.array([0.0, 0.0, 10, 0.0, 0.0, 0.0
                                      ]) if init_pos is None else init_pos

        self.goal = goal

        ## Simulation
        self.sim = PhysicsSim(self.init_pos, init_velocities,
                              init_angle_velocities, runtime)
        self.action_repeat = action_repeat

        self.state_size = self.action_repeat * state_size
        self.action_low = action_low
        self.action_high = action_high
        self.action_size = 4  # Dimension of each action (one for each rotor)

        self.min_accuracy = min_accuracy

        self.distance_factor = distance_factor  # DEPRECIATED
        self.angle_factor = angle_factor  # DEPRECIATED
        self.v_factor = v_factor  # DEPRECIATED
        self.rotor_factor = rotor_factor  # DEPRECIATED
        self.time_factor = time_factor  # DEPRECIATED
        self.elevation_factor = elevation_factor  # DEPRECIATED
        self.crash_factor = crash_factor  # DEPRECIATED
        self.target_factor = target_factor  # DEPRECIATED

        ## Numerical representation of the distance to the target
        self.target_proximity = (abs(self.sim.pose[:3] -
                                     self.target_pos)).sum()
        self.total_proximity = []
        self.average_proximity = 0.
        self.best_proximity = np.inf
        self.previous_proximity = self.target_proximity

        ## DEPRECIATED
        self.params = {
            "Distance": self.distance_factor,
            "Angle": self.angle_factor,
            "Angular V": self.v_factor,
            "Rotor": self.rotor_factor,
            "Time": self.time_factor,
            "Elevation": self.elevation_factor,
            "Crash": self.crash_factor,
            "Target": self.target_factor
        }

    def get_reward(self, previous_reward, done, rotor_speeds):
        """
        Returns a reward based on a number of positive and negative factors guiding the agent on how to perform.
        These values seemed like appropriate numbers to encourage good behavior,
        but I added scaler variables to try various relations between the weights.
        Overall, the goal was for the agent to have a negative score if it crashes, a positive score if it doesn't,
        and it should have a very positive score if it achieves it's goal.
        """
        ## Updates the distance to target and calculates total, aveerage & best if done
        self.target_proximity = (abs(self.sim.pose[:3] -
                                     self.target_pos)).sum()
        if done:
            self.total_proximity.append(self.target_proximity)
            self.average_proximity = np.mean(self.total_proximity[-10:])
            if self.target_proximity < self.best_proximity:
                self.best_proximity = self.target_proximity

        reward = 1. - .3 * (abs(self.sim.pose[:3] - self.target_pos)
                            ).sum()  # Default reward function

        ## The following calculations are DEPRECIATED due to needing to simplify the reward function
        ## Subtracts the difference between the current position & target
        # reward = (1 - self.target_proximity) * self.distance_factor

        ## Penalizes when moving further from the target than before, within a margin of error
        # if self.previous_proximity < (self.target_proximity * 1.2):
        #     reward -= 50 * self.distance_factor
        # self.previous_proximity = self.target_proximity

        ## Adds a reward amount if the yaw, roll or pitch are less than about 28 degrees,
        ## Otherwise, it's a negative reward
        # for angle in self.sim.pose[3:]:
        #     if angle >= 6.3:
        #         print("Something's wrong with the angles.")
        #     if angle >= 3.1415:
        #         angle = 6.283 - angle
        #
        #     reward += (.5 - angle) * self.angle_factor

        ## Adds a reward amount if the yaw, roll or pitch velocities are less than about 28 degrees per second,
        ## Otherwise, it's a negative reward
        # for v in self.sim.angular_v:
        #     reward += (.5 - abs(v)) * self.v_factor

        ## Add a penalty if the rotors differ in speed too drastically
        # for rotor_a, rotor_b in itertools.combinations(rotor_speeds, 2):
        #     if abs(rotor_a - rotor_b) > 500:
        #         reward -= abs(rotor_a - rotor_b) * self.rotor_factor

        ## Set a time factor that rewards the agent for each timestep that it hasn't crashed
        ## Plus, it adds a major bonus if it makes it to the penultimate timestep.
        # reward += 100 * self.time_factor
        # reward = reward + (100 * self.time_factor) if self.sim.time > 4 else reward

        # ## Further penalizes deviation in elevation
        # if self.goal is not "land":
        #     reward -= abs(self.target_pos[2] - self.sim.pose[2]) * 10 * self.elevation_factor

        ## Set major reward/penalty depending on whether agent crashes early or not
        if done and np.average(abs(self.sim.pose[:3] -
                                   self.target_pos)) > self.min_accuracy * 2:
            reward -= 10000 * self.crash_factor
            # print("\nCrashed!! Current proximity: {:7.3f} || Current timestep: {:7.3f}".format(self.target_proximity, self.sim.time)) # Useful for visualizing behavior while training is running.

        ## Sets a decent reward if the agent gets close to the target to help it hone in on what works
        if self.target_proximity < self.min_accuracy * 4:
            reward += 50 * self.target_factor
            ## Set major reward if agent gets to the target location (within an acceptable minimum accuracy).
            ## And most importantly, it stops the simulation at that time, except hover, which can't stop early.
            if self.target_proximity < self.min_accuracy:
                ## Add an additional reward factor for reaching the target around the penultimate timestep,
                ## but not necessarily to rush there so quickly as to zoom past uncontrollably.
                reward += 100000 * (
                    self.sim.time /
                    (self.sim.runtime - 1)) * self.target_factor
                done = True if self.goal is not "hover" else False
                # print("\nSuccess!  (•̀ᴗ•́)و ̑̑  ")

        ## DEPRECIATED
        ## Normalizes the reward to a range of -1 to 1 by implementing a floor of -1
        ## This was suggested as a fix, but did not really improve the situation for me.
        # reward = -1. if reward < -1. else reward

        return reward + previous_reward, done

    def step(self, rotor_speeds):
        """Uses action to obtain next state, reward, done."""
        reward = 0
        pose_all = []
        for _ in range(self.action_repeat):
            done = self.sim.next_timestep(
                rotor_speeds)  # update the sim pose and velocities
            reward, done = self.get_reward(reward, done, rotor_speeds)
            pose_all.append(self.sim.pose)
        next_state = np.concatenate(pose_all)
        return next_state, reward, done

    def reset(self):
        """Reset the sim to start a new episode."""
        self.sim.reset()
        state = np.concatenate([self.sim.pose] * self.action_repeat)
        return state
class Task():
    """Task (environment) that defines the goal and provides feedback to the agent."""
    def __init__(self,
                 init_pose=None,
                 init_velocities=None,
                 init_angle_velocities=None,
                 runtime=5.,
                 target_pos=None):
        """Initialize a Task object.
        Params
        ======
            init_pose: initial position of the quadcopter in (x,y,z) dimensions and the Euler angles
            init_velocities: initial velocity of the quadcopter in (x,y,z) dimensions
            init_angle_velocities: initial radians/second for each of the three Euler angles
            runtime: time limit for each episode
            target_pos: target/goal (x,y,z) position for the agent
        """
        # Simulation
        self.sim = PhysicsSim(init_pose, init_velocities,
                              init_angle_velocities, runtime)
        self.action_repeat = 3

        self.state_size = self.action_repeat * 6
        self.action_low = 0
        self.action_high = 900
        self.action_size = 4

        # Goal
        self.target_pos = target_pos if target_pos is not None else np.array(
            [0., 0., 10.])

    def get_reward(self):
        """Uses current pose of sim to return reward."""
        #initialize counter variables
        reward = 0
        penalty = 0

        #define current position
        current_position = self.sim.pose[:3]

        # penalty for distance from target
        penalty += abs(current_position[0] - self.target_pos[0])
        penalty += abs(current_position[1] - self.target_pos[1])
        penalty += abs(current_position[2] - self.target_pos[2])

        #get current positons
        x_pos = (current_position[0] - self.target_pos[0])
        y_pos = (current_position[1] - self.target_pos[1])
        z_pos = (current_position[2] - self.target_pos[2])

        distance = np.sqrt((x_pos)**2 + (y_pos)**2 + (z_pos)**2)

        # structured reward system in the hope the agent will pick up the trail
        if distance < 5:
            reward += 10000

        if distance < 10:
            reward += 5000

        if distance < 20:
            reward += 2000

        if distance < 40:
            reward += 1000

        if distance < 60:
            reward += 500

        if distance < 100:
            reward += 250

        if distance < 200:
            reward += 100

        return reward - 0.05 * (penalty)

    def step(self, rotor_speeds):
        """Uses action to obtain next state, reward, done."""
        reward = 0
        pose_all = []
        for _ in range(self.action_repeat):
            done = self.sim.next_timestep(
                rotor_speeds)  # update the sim pose and velocities
            reward += self.get_reward()
            pose_all.append(self.sim.pose)
        next_state = np.concatenate(pose_all)
        return next_state, reward, done

    def reset(self):
        """Reset the sim to start a new episode."""
        self.sim.reset()
        state = np.concatenate([self.sim.pose] * self.action_repeat)
        return state
class TakeOff():
    """Task (environment) that defines the goal and provides feedback to the agent."""
    def __init__(self,
                 init_pose=None,
                 init_velocities=None,
                 init_angle_velocities=None,
                 runtime=5.,
                 target_pos=None):
        """Initialize a Task object.
        Params
        ======
            init_pose: initial position of the quadcopter in (x,y,z) dimensions and the Euler angles
            init_velocities: initial velocity of the quadcopter in (x,y,z) dimensions
            init_angle_velocities: initial radians/second for each of the three Euler angles
            runtime: time limit for each episode
            target_pos: target/goal (x,y,z) position for the agent
        """
        # Simulation
        self.sim = PhysicsSim(init_pose, init_velocities,
                              init_angle_velocities, runtime)
        self.action_repeat = 3

        self.state_size = self.action_repeat * 6
        self.action_low = 0
        self.action_high = 900
        self.action_size = 4

        # Start Pos
        self.init_pose = init_pose if init_pose is not None else np.array(
            [0., 0., 0., 0., 0., 0.])

        # Goal
        self.target_pos = target_pos if target_pos is not None else np.array(
            [0., 0., 10.])

    def get_reward(self):
        """Uses current pose of sim to return reward."""
        reward = 0.0
        done = False
        reward = -min(abs(self.target_pos[2] - self.init_pose[2]), 20.0)
        if self.init_pose[2] >= self.target_pos[2]:
            reward += 10.0
            done = True
        elif self.sim.time > self.sim.runtime:
            reward -= 10.0
            done = True
        return reward

    def step(self, rotor_speeds):
        """Uses action to obtain next state, reward, done."""
        reward = 0
        pose_all = []
        for _ in range(self.action_repeat):
            done = self.sim.next_timestep(rotor_speeds)
            reward += self.get_reward()
            pose_all.append(self.sim.pose)
        next_state = np.concatenate(pose_all)
        return next_state, reward, done

    def reset(self):
        """Reset the sim to start a new episode."""
        self.sim.reset()
        state = np.concatenate([self.sim.pose] * self.action_repeat)
        return state
Beispiel #25
0
class Task():
    """Task (environment) that defines the goal and provides feedback to the agent."""
    def __init__(self,
                 init_pose=None,
                 init_velocities=None,
                 init_angle_velocities=None,
                 runtime=5.,
                 target_pos=None):
        """Initialize a Task object.
        Params
        ======
            init_pose: initial position of the quadcopter in (x,y,z) dimensions and the Euler angles
            init_velocities: initial velocity of the quadcopter in (x,y,z) dimensions
            init_angle_velocities: initial radians/second for each of the three Euler angles
            runtime: time limit for each episode
            target_pos: target/goal (x,y,z) position for the agent
        """
        # Simulation
        self.sim = PhysicsSim(init_pose, init_velocities,
                              init_angle_velocities, runtime)
        self.action_repeat = 1

        self.state_size = self.action_repeat * 6
        self.action_low = 0
        self.action_high = 900
        self.action_size = 4

        # Goal
        self.target_pos = target_pos if target_pos is not None else np.array(
            [0., 0., 10.])

    def get_reward(self):
        """Uses current pose of sim to return reward."""
        #reward = 1.-.3*(abs(self.sim.pose[:3] - self.target_pos)).sum()

        punish_x = abs(self.sim.pose[0] - self.target_pos[0])
        punish_y = abs(self.sim.pose[1] - self.target_pos[1])
        reward_z = 1.0 - (self.target_pos[2] - self.sim.pose[2])
        punish_rot1 = abs(self.sim.pose[3])
        punish_rot2 = abs(self.sim.pose[4])
        punish_rot3 = abs(self.sim.pose[5])
        reward_vz = self.sim.v[2]
        reward = reward_z - 0.1 * (punish_x + punish_y) - 0.1 * (punish_rot1 + punish_rot2 + punish_rot3) + \
            0.1 * reward_vz

        return reward

    def step(self, rotor_speeds):
        """Uses action to obtain next state, reward, done."""
        reward = 0
        pose_all = []

        for _ in range(self.action_repeat):
            done = self.sim.next_timestep(
                rotor_speeds)  # update the sim pose and velocities
            reward += self.get_reward()
            pose_all.append(self.sim.pose)
        next_state = np.concatenate(pose_all)
        return next_state, reward, done

    def reset(self):
        """Reset the sim to start a new episode."""
        self.sim.reset()
        state = np.concatenate([self.sim.pose] * self.action_repeat)
        return state
Beispiel #26
0
class Task():
    """Task (environment) that defines the goal and provides feedback to the agent."""
    def __init__(self,
                 init_pose=None,
                 init_velocities=None,
                 init_angle_velocities=None,
                 runtime=5.,
                 target_pos=None):
        """Initialize a Task object.
        Params
        ======
            init_pose: initial position of the quadcopter in (x,y,z) dimensions and the Euler angles
            init_velocities: initial velocity of the quadcopter in (x,y,z) dimensions
            init_angle_velocities: initial radians/second for each of the three Euler angles
            runtime: time limit for each episode
            target_pos: target/goal (x,y,z) position for the agent
        """
        # Simulation
        self.sim = PhysicsSim(init_pose, init_velocities,
                              init_angle_velocities, runtime)
        self.action_repeat = 3

        self.state_size = self.action_repeat * 12
        self.action_low = 100
        self.action_high = 900
        self.action_size = 4
        self.runtime = runtime

        # Goal
        self.target_pos = target_pos if target_pos is not None else np.array(
            [0., 0., 10.])

        self.upp_bounds = self.target_pos + 2
        self.low_bounds = self.target_pos - 2

    def get_reward(self):
        """Uses current pose of sim to return reward."""

        distance = np.tanh(3. - 0.003 *
                           (abs(self.sim.pose[:3] - self.target_pos)).sum())
        #         angular_vel = abs(np.tanh(self.sim.angular_v.sum()))
        #         angles_pos = abs(np.tanh(self.sim.pose[3:].sum()))
        #         height = self.sim.pose[2]

        reward = 1 + distance

        return reward

    def step(self, rotor_speeds):
        """Uses action to obtain next state, reward, done."""
        reward = 0
        pose_all = []
        for _ in range(self.action_repeat):
            done = self.sim.next_timestep(
                rotor_speeds)  # update the sim pose and velocities
            reward += self.get_reward()
            pose_all.append(self.sim.pose)
            pose_all.append(self.sim.v)
            pose_all.append(self.sim.angular_v)

        condition = (self.upp_bounds > self.sim.pose[:3]).all() and (
            self.sim.pose[:3] > self.low_bounds).all()
        if condition:
            print("Target reached!", end="")

            reward += 40
            done = True

        elif done:
            if self.sim.pose[2] <= self.sim.lower_bounds[2]:
                print("Crashed!", end="")
                reward -= 20
            elif (self.sim.pose[:3] <= self.sim.lower_bounds).any() or (
                    self.sim.pose[:3] >= self.sim.upper_bounds).any():
                print("Out!", end="")
            else:
                print("Time out!", end="")

        next_state = np.concatenate(pose_all)
        return next_state, reward, done

    def reset(self):
        """Reset the sim to start a new episode."""
        self.sim.reset()
        state = np.concatenate(
            (self.sim.pose, self.sim.v, self.sim.angular_v) *
            self.action_repeat)

        return state
Beispiel #27
0
class Task():
    """Task (environment) that defines the goal and provides feedback to the agent."""
    def __init__(self,
                 init_pose=None,
                 init_velocities=None,
                 init_angle_velocities=None,
                 runtime=5.,
                 target_pos=None):
        """Initialize a Task object.
        Params
        ======
            init_pose: initial position of the quadcopter in (x,y,z) dimensions and the Euler angles
            init_velocities: initial velocity of the quadcopter in (x,y,z) dimensions
            init_angle_velocities: initial radians/second for each of the three Euler angles
            runtime: time limit for each episode
            target_pos: target/goal (x,y,z) position for the agent
        """
        # Simulation
        self.sim = PhysicsSim(init_pose, init_velocities,
                              init_angle_velocities, runtime)
        self.action_repeat = 3

        self.state_size = self.action_repeat * 6
        self.action_low = 0
        self.action_high = 900
        self.action_size = 4
        self.t = 0
        # Goal
        self.target_pos = target_pos if target_pos is not None else np.array(
            [0., 0., 10.])

    def get_reward(self, done):
        """Uses current pose of sim to return reward."""
        penalty = 0
        if done and self.t < 240:
            penalty = 0
        reward = penalty + 1 - 0.01 * (
            np.linalg.norm(self.sim.pose[:3] - self.target_pos)
        )  #- .001*(np.square(self.sim.pose[3:])).sum()
        #alt_reward =  penalty + 1 - 0.01*(np.linalg.norm(self.sim.pose[:3] - self.target_pos)) - .001*(np.square(self.sim.pose[3:])).sum()
        #- .005*(abs(self.sim.v)).sum()
        #- .005*(abs(self.sim.pose[3:])).sum()
        #- .3*(abs(self.sim.angular_v)).sum() - .3*(abs(self.sim.pose[3:])).sum() -.01*(abs(self.sim.angular_v)).sum()
        return reward

    def step(self, rotor_speeds):
        """Uses action to obtain next state, reward, done."""
        reward = 0
        pose_all = []
        for _ in range(self.action_repeat):
            self.t += 1
            done = self.sim.next_timestep(
                rotor_speeds)  # update the sim pose and velocities
            reward += self.get_reward(done)
            pose_all.append(self.sim.pose)
        next_state = np.concatenate(pose_all)
        return next_state, reward, done

    def reset(self):
        """Reset the sim to start a new episode."""
        self.t = 0
        self.sim.reset()
        state = np.concatenate([self.sim.pose] * self.action_repeat)
        return state
class Task():
    """Task (environment) that defines the goal and provides feedback to
     the agent."""
    def __init__(self,
                 init_pose=None,
                 init_velocities=None,
                 init_angle_velocities=None,
                 runtime=5.,
                 target_pos=None):
        """Initialize a Task object.
        Params
        ======
            init_pose: initial position of the quadcopter in (x,y,z) dimensions
            and the Euler angles
            init_velocities: initial velocity of the quadcopter in
            (x,y,z) dimensions
            init_angle_velocities: initial radians/second for each of the three
            Euler angles
            runtime: time limit for each episode
            target_pos: target/goal (x,y,z) position for the agent
        """
        # Simulation
        self.sim = PhysicsSim(init_pose, init_velocities,
                              init_angle_velocities, runtime)
        self.action_repeat = 3

        self.state_size = self.action_repeat * 6
        self.action_low = 0
        self.action_high = 900
        self.action_size = 4
        self.target_pos = target_pos

        # Goal - Takeoff (0, 0, 0) and reach postion (0, 0, 150)
        self.target_pos = target_pos

    def get_reward(self):
        """Uses current pose of sim to return reward."""
        reward = np.tanh(1. - .03 *
                         (abs(self.sim.pose[:3] - self.target_pos))).sum()
        "Penalize the crashes"
        if self.sim.done and self.sim.runtime > self.sim.time:
            reward += -100
        if self.sim.pose[2] >= self.target_pos[2]:
            reward = 200
        return reward

    def step(self, rotor_speeds):
        """Uses action to obtain next state, reward, done."""
        reward = 0
        pose_all = []
        for _ in range(self.action_repeat):
            done = self.sim.next_timestep(
                rotor_speeds)  # update the sim pose and velocities
            reward += self.get_reward()
            pose_all.append(self.sim.pose)
        next_state = np.concatenate(pose_all)
        return next_state, reward, done

    def reset(self):
        """Reset the sim to start a new episode."""
        self.sim.reset()
        state = np.concatenate([self.sim.pose] * self.action_repeat)
        return state
Beispiel #29
0
class Takeoff():
    """Task (environment) that defines the goal and provides feedback to the agent."""
    def __init__(self, init_pose=None, init_velocities=None, 
        init_angle_velocities=None, runtime=5., height=None):
        """Initialize a Task object.
        Params
        ======
            init_pose: initial position of the quadcopter in (x,y,z) dimensions and the Euler angles
            init_velocities: initial velocity of the quadcopter in (x,y,z) dimensions
            init_angle_velocities: initial radians/second for each of the three Euler angles
            runtime: time limit for each episode
            target_pos: target/goal (x,y,z) position for the agent
        """
        # Simulation
        self.sim = PhysicsSim(init_pose, init_velocities, init_angle_velocities, runtime) 
        self.action_repeat = 30

        self.state_size = self.action_repeat * 6
        self.action_low = 0
        self.action_high = 900
        self.action_size = 4

        self.max_reward = -np.inf
        # Goal
        self.target_pos = [0., 0., height] if height is not None else np.array([0., 0., 10.]) 

    def get_reward(self):
        """Uses current pose of sim to return reward."""
        #height_reward = np.exp(-abs(self.sim.pose[2] - self.target_pos[2]))
        
        #error = 1 * np.exp(-abs(self.sim.pose[2] - self.target_pos[2]))
        #reward = np.tanh(1 - np.exp(-1/(abs(self.sim.pose[2] - self.target_pos[2])/self.target_pos[2])))
        #reward = np.exp(-np.sqrt((abs(self.sim.pose[2] - self.target_pos[2]))))
        reward = (1-(abs(self.sim.pose[2] - self.target_pos[2]))/self.target_pos[2])
        #if reward < 0.1:
        #    reward += 20
        #self.max_reward = max(self.max_reward, reward)
        #phi_error = 0.1*(1 - np.exp(-1/self.sim.pose[3]))
        #theta_error = 0.1*(1 - np.exp(-1/self.sim.pose[4]))
        
        #phi_error = 0.000004 * (1 - abs(np.sin(self.sim.pose[3])))
        #theta_error = 0.000004 * (1 - abs(np.sin(self.sim.pose[4])))
        
        #penalty_phi = np.exp(-abs(self.sim.pose[3] - 0)*0.01)+0.5
        #penalty_theta = np.exp(-abs(self.sim.pose[4] - 0)*0.01)+0.5
        
        #reward = np.exp(-abs(self.sim.pose[3] - 0))/2 #phi
        #reward += np.exp(-abs(self.sim.pose[4] - 0))/2 #theta
        #reward = np.tanh(reward/3*2-1)
        
        return reward #+ phi_error + theta_error

    def step(self, rotor_speeds):
        """Uses action to obtain next state, reward, done."""
        reward = 0
        pose_all = []
        for _ in range(self.action_repeat):
            done = self.sim.next_timestep(rotor_speeds) # update the sim pose and velocities
            reward += self.get_reward() 
            pose_all.append(self.sim.pose)
        next_state = np.concatenate(pose_all)
        return next_state, reward, done

    def reset(self):
        """Reset the sim to start a new episode."""
        self.sim.reset()
        state = np.concatenate([self.sim.pose] * self.action_repeat) 
        return state
Beispiel #30
0
class Task():
    """Task (environment) that defines the goal and provides feedback to the agent."""
    def __init__(self, init_pose=None, init_velocities=None, 
        init_angle_velocities=None, runtime=5., target_pos=None):
        """Initialize a Task object.
        Params
        ======
            init_pose: initial position of the quadcopter in (x,y,z) dimensions and the Euler angles
            init_velocities: initial velocity of the quadcopter in (x,y,z) dimensions
            init_angle_velocities: initial radians/second for each of the three Euler angles
            runtime: time limit for each episode
            target_pos: target/goal (x,y,z) position for the agent
        """
        # Simulation
        self.sim = PhysicsSim(init_pose, init_velocities, init_angle_velocities, runtime) 
        self.action_repeat = 1

        self.state_size = self.action_repeat * 2
        self.action_low = 390
        self.action_high = 420
        self.action_size = 1
        self.action_range = self.action_high - self.action_low 

        # Goal
        self.target_pos = target_pos if target_pos is not None else np.array([0., 0., 10.]) 

    def get_reward(self):
        """Uses current pose of sim to return reward."""
        # Get max reward of 1 if within 1 unit from hover spot
        # Get min reward of -1 if 6+ units from hover spot
        
        distance = np.linalg.norm(self.sim.pose[:3] - self.target_pos)
        
        #print(self.sim.pose[:3])
        #print(self.target_pos)
        #print(distance)
    
          #if distance < 4:
        #    reward = 1 - (distance/50)**0.4
            #reward = min(1,1/(distance + 1)**2)
        #else:
        #    reward = 0
        
        reward = 1 - (distance/50)**0.4
        return reward

    def step(self, rotor_speeds):
        
        """Uses action to obtain next state, reward, done."""
        reward = 0
        pose_all = []
        for _ in range(self.action_repeat):
            done = self.sim.next_timestep(rotor_speeds) # update the sim pose and velocities
            #done = self.sim.next_timestep(rotor_speeds)
            #done = self.sim.next_timestep(rotor_speeds)    
            reward += self.get_reward() 
            z_norm = (self.sim.pose[2] - 10)/3
            z_v_norm = (self.sim.v[2]) / 3
            rotor_norm = (rotor_speeds[0] - self.action_low) / self.action_range * 2 - 1
            pose_all.append(z_norm)
            pose_all.append(z_v_norm)
            #print(rotor_speeds)

        next_state = np.array(pose_all)
        #print(next_state)
        return next_state, reward, done
        '''
        """Uses action to obtain next state, reward, done."""
        reward = 0
        pose_all = []
        for _ in range(self.action_repeat):
            done = self.sim.next_timestep(rotor_speeds) # update the sim pose and velocities
            reward += self.get_reward() 
            pose_all.append(self.sim.pose)
        next_state = np.concatenate(pose_all)
        return next_state, reward, done'''

    def reset(self):
        """Reset the sim to start a new episode."""
        self.sim.reset()
        
        #perturb the start by +- one unit
        perturb_unit = 2
        self.sim.pose[2] += (2*np.random.random()-1) * perturb_unit
        #print("Starting height {:7.3f}".format(self.sim.pose[2]))
        z_norm = (self.sim.pose[2] - 10)/5
        z_v_norm = 0
        rotor_norm = 0
        
        state = np.array(([z_norm, z_v_norm]) * self.action_repeat)

        return state
        
        '''
Beispiel #31
0
class Task():
    """Task (environment) that defines the goal and provides feedback to the agent."""
    def __init__(self,
                 init_pose=None,
                 init_velocities=None,
                 init_angle_velocities=None,
                 runtime=5.,
                 target_pos=None):
        """Initialize a Task object.
        Params
        ======
            init_pose: initial position of the quadcopter in (x,y,z) dimensions and the Euler angles
            init_velocities: initial velocity of the quadcopter in (x,y,z) dimensions
            init_angle_velocities: initial radians/second for each of the three Euler angles
            runtime: time limit for each episode
            target_pos: target/goal (x,y,z) position for the agent
        """
        # Simulation
        self.sim = PhysicsSim(init_pose, init_velocities,
                              init_angle_velocities, runtime)
        self.action_repeat = 3

        self.state_size = self.action_repeat * 6
        self.action_low = 0
        self.action_high = 900
        self.action_size = 4

        # Goal
        self.target_pos = target_pos if target_pos is not None else np.array(
            [0., 0., 10.])

    def get_reward(self):
        """Uses current pose of sim to return reward."""

        # Trying reward functions that are linear or squared sums of the difference between initial and target coordinates
        #
        # reward = 1.-.3*(abs(self.sim.pose[:3] - self.target_pos)).sum()
        # reward = 10. - ((abs(self.sim.pose[:3] - self.target_pos)**2).sum())
        # reward = 10. - 0.03*((self.sim.pose[:3] - self.target_pos).sum())**2

        # Trying a reward function that incorporates rewards for hovering along each coordinate plane, with heavier weighting towards vertical (z-axis) hovering
        xr = 0.02 * (abs(self.sim.pose[0] - self.target_pos[0]))**2 / 100
        yr = 0.02 * (abs(self.sim.pose[1] - self.target_pos[1]))**2 / 100
        zr = 0.05 * (abs(self.sim.pose[2] - self.target_pos[2]))**2 / 100
        reward = 10 - xr - yr - zr

        return reward

    def step(self, rotor_speeds):
        """Uses action to obtain next state, reward, done."""
        reward = 0
        pose_all = []
        for _ in range(self.action_repeat):
            done = self.sim.next_timestep(
                rotor_speeds)  # update the sim pose and velocities
            reward += self.get_reward()
            pose_all.append(self.sim.pose)
        next_state = np.concatenate(pose_all)
        return next_state, reward, done

    def reset(self):
        """Reset the sim to start a new episode."""
        self.sim.reset()
        state = np.concatenate([self.sim.pose] * self.action_repeat)
        return state
Beispiel #32
0
class Task():
    """Task (environment) that defines the goal and provides feedback to the agent."""
    def __init__(self,
                 init_pose=None,
                 init_velocities=None,
                 init_angle_velocities=None,
                 runtime=5.,
                 target_pos=None):
        """Initialize a Task object.
        Params
        ======
            init_pose: initial position of the quadcopter in (x,y,z) dimensions and the Euler angles
            init_velocities: initial velocity of the quadcopter in (x,y,z) dimensions
            init_angle_velocities: initial radians/second for each of the three Euler angles
            runtime: time limit for each episode
            target_pos: target/goal (x,y,z) position for the agent
        """
        # Simulation
        self.sim = PhysicsSim(init_pose, init_velocities,
                              init_angle_velocities, runtime)
        self.action_repeat = 3

        self.state_size = self.action_repeat * 6
        self.action_low = 0
        self.action_high = 900
        self.action_size = 4

        # Goal
        if target_pos is None:
            print("Setting default init pose")
        self.target_pos = target_pos if target_pos is not None else np.array(
            [0., 0., 10.])

    def get_reward(self):
        """Uses current pose of sim to return reward."""
        #reward = (1 - 0.003 * (abs(self.sim.pose[2] - self.target_pos[2]))).sum()
        delta_x = abs(self.sim.pose[0] - self.target_pos[0])
        delta_y = abs(self.sim.pose[1] - self.target_pos[1])
        delta_z = abs(self.sim.pose[2] - self.target_pos[2])

        reward = 1 - .252 * (delta_z)
        reward = reward - (0.003 * delta_x) if delta_x > 0 else reward
        reward = reward - (0.003 * delta_y) if delta_y > 0 else reward

        return np.tanh(reward)

    def step(self, rotor_speeds):
        """Uses action to obtain next state, reward, done."""
        reward = 0
        pose_all = []
        for _ in range(self.action_repeat):
            done = self.sim.next_timestep(
                rotor_speeds)  # update the sim pose and velocities
            reward += self.get_reward()
            pose_all.append(self.sim.pose)
            if done:
                reward += 10
        next_state = np.concatenate(pose_all)
        return next_state, reward, done

    def reset(self):
        """Reset the sim to start a new episode."""
        self.sim.reset()
        state = np.concatenate([self.sim.pose] * self.action_repeat)
        return state
Beispiel #33
0
class Task:
    """Task (environment) that defines the goal and provides feedback to the agent."""

    def __init__(self, init_pose=None, init_velocities=None,
                 init_angle_velocities=None, runtime=5., target_pos=None):
        """Initialize a Task object.
        Params
        ======
            init_pose: initial position of the quadcopter in (x,y,z) dimensions and the Euler angles
            init_velocities: initial velocity of the quadcopter in (x,y,z) dimensions
            init_angle_velocities: initial radians/second for each of the three Euler angles
            runtime: time limit for each episode
            target_pos: target/goal (x,y,z) position for the agent
        """
        # Simulation
        self.sim = PhysicsSim(init_pose, init_velocities, init_angle_velocities, runtime)
        self.action_repeat = 3

        self.state_size = self.action_repeat * 6
        self.action_low = 0
        self.action_high = 900
        self.action_size = 4

        # Goal
        self.target_pos = target_pos if target_pos is not None else np.array([0., 0., 10.])

    def get_reward(self):
        return self.everything_considered()

    def step(self, rotor_speeds):
        """Uses action to obtain next state, reward, done."""
        reward = 0
        pose_all = []
        done = False
        for _ in range(self.action_repeat):
            done = self.sim.next_timestep(rotor_speeds)  # update the sim pose and velocities
            reward += self.get_reward()
            pose_all.append(self.sim.pose)
        next_state = np.concatenate(pose_all)
        return next_state, reward, done

    def reset(self):
        """Reset the sim to start a new episode."""
        self.sim.reset()
        state = np.concatenate([self.sim.pose] * self.action_repeat)
        return state

    # rewards ...
    def reward_speed(self):
        remain_s = self.target_pos - self.sim.pose[:3]
        target_s = remain_s / (self.sim.runtime - self.sim.time)
        delta_s = abs(self.sim.v - target_s).sum()
        return 2 - sigmoid(delta_s)

    def reward_basic(self):
        return np.tanh(1 - 0.003 * (abs(self.sim.pose[:3] - self.target_pos))).sum()

    def everything_considered(self):
        euler = self.sim.pose[3:6]
        current_pos = self.sim.pose[:3]
        start_pos = self.sim.init_pose[:3]
        speed = self.sim.v
        s_max = speed[2]  # speed in z
        s_min = speed[:2]  # speed in x, y
        angular_speed = self.sim.angular_v
        delta_position = current_pos - start_pos
        dp_max = delta_position[2]  # max this (Z)
        dp_min = delta_position[:2]  # min this (X, Y)

        euler = (abs_arctan(euler - np.pi) * 1.25).sum() / 3  # min = 0, max = 1
        angular_speed = abs_arctan_inv(angular_speed).sum() / 3  # min = 0, max = 1
        dp_min = abs_arctan_inv(dp_min).sum() / 2  # min = 0, max = 1s
        dp_max = abs_arctan(dp_max)  # min = 0, max = 1
        s_min = abs_arctan_inv(s_min).sum() / 2  # min = 0, max = 1
        s_max = np.arctan(s_max) / pi2  # min = -1 , max = 1

        sum = euler + angular_speed + dp_max + dp_min + s_max + s_min
        mx = 6
        mn = -1
        delta = mx - mn
        reward = (sum + abs(mn)) / delta
        return reward