def __init__(self,
                 init_pose=None,
                 init_velocities=None,
                 init_angle_velocities=None,
                 runtime=5.,
                 target_pos=None):
        """Initialize a Task object.
        Params
        ======
            init_pose: initial position of the quadcopter in (x,y,z) dimensions and the Euler angles
            init_velocities: initial velocity of the quadcopter in (x,y,z) dimensions
            init_angle_velocities: initial radians/second for each of the three Euler angles
            runtime: time limit for each episode
            target_pos: target/goal (x,y,z) position for the agent
        """
        # Simulation
        self.sim = PhysicsSim(init_pose, init_velocities,
                              init_angle_velocities, runtime)
        self.action_repeat = 3

        self.state_size = self.action_repeat * 6
        self.action_low = 0
        self.action_high = 900
        self.action_size = 4

        # Goal
        self.target_pos = target_pos if target_pos is not None else np.array(
            [0., 0., 100.])
Esempio n. 2
0
class DDGPTask():
    """Task (environment) that defines the goal and provides feedback to the agent."""
    def __init__(self,
                 init_pose=None,
                 init_velocities=None,
                 init_angle_velocities=None,
                 runtime=5.,
                 target_pos=None):
        """Initialize a Task object.
        Params
        ======
            init_pose: initial position of the quadcopter in (x,y,z) dimensions and the Euler angles
            init_velocities: initial velocity of the quadcopter in (x,y,z) dimensions
            init_angle_velocities: initial radians/second for each of the three Euler angles
            runtime: time limit for each episode
            target_pos: target/goal (x,y,z) position for the agent
        """
        # Simulation
        self.sim = PhysicsSim(init_pose, init_velocities,
                              init_angle_velocities,
                              runtime)  # PhysicsSim(): from physics_sim.py
        self.action_repeat = 3

        self.state_size = self.action_repeat * 6
        self.action_low = 0
        self.action_high = 900
        self.action_size = 4

        # Goal
        self.target_pos = target_pos if target_pos is not None else np.array(
            [0., 0., 10.])

    def get_reward(self):
        """Uses current pose of sim to return reward."""

        reward = np.tanh(1 - 0.03 *
                         (abs(self.sim.pose[:3] - self.target_pos))).sum()

        return reward

    def step(self, rotor_speeds):
        """Uses action to obtain next state, reward, done."""
        reward = 0
        pose_all = []
        for _ in range(self.action_repeat):
            done = self.sim.next_timestep(
                rotor_speeds)  # update the sim pose and velocities
            reward += self.get_reward()
            pose_all.append(self.sim.pose)
        next_state = np.concatenate(pose_all)
        return next_state, reward, done

    def reset(self):
        """Reset the sim to start a new episode."""
        self.sim.reset()
        state = np.concatenate([self.sim.pose] * self.action_repeat)
        return state
Esempio n. 3
0
class Task():
    """Task (environment) that defines the goal and provides feedback to the agent.
       The task is to reach a target pose.
    """
    def __init__(self,
                 init_pose=None,
                 init_velocities=None,
                 init_angle_velocities=None,
                 runtime=4.,
                 target_pos=None):
        """Initialize a Task object.
        
        Params
        ======
            init_pose: initial position of the quadcopter in (x,y,z) dimensions and the Euler angles
            init_velocities: initial velocity of the quadcopter in (x,y,z) dimensions
            init_angle_velocities: initial radians/second for each of the three Euler angles
            runtime: time limit for each episode
            target_pos: target/goal (x,y,z) position for the agent
            target_v: target/goal (x,y,z) velocities for the agent
            target_angular_v: target/goal (x,y,z) angular velocities for the agent
        """

        # Simulation
        self.sim = PhysicsSim(init_pose, init_velocities,
                              init_angle_velocities, runtime)
        self.action_repeat = 3

        self.state_size = self.action_repeat * 6
        self.action_low = 0
        self.action_high = 900
        self.action_size = 4

        # Goal
        self.target_pos = target_pos if target_pos is not None else np.array(
            [10., 10., 0.])
        #self.target_v = target_v if target_v is not None else np.array([0., 0., 0.])
        #self.target_angular_v = target_angular_v if target_angular_v is not None else np.array([0., 0., 0.])

    def get_reward(self):
        """Uses current pose of sim to return reward."""

        # the distance to the goal
        dist = np.linalg.norm(self.sim.pose[:3] - self.target_pos)
        abs_dist = abs(self.sim.pose[:3] - self.target_pos).sum()

        # the speed
        #speed = np.linalg.norm(self.sim.v)
        #abs_vel = abs(self.sim.v).sum()

        # the angular speed
        #ang_speed = np.linalg.norm(self.sim.angular_v)
        #abs_ang_vel = abs(self.sim.angular_v).sum()

        rew_fct = np.tanh(1 - 0.0008 * abs_dist)  # - 0.0001 * abs_vel)
        reward = rew_fct
        return reward

        ## sample reward function:
        #reward = 1.-.003*(abs(self.sim.pose[:3] - self.target_pos)).sum()

        # the reward function - using just discounts and powers
        #rew_fct = 1 - (dist/600)**0.4

        # the reward function - using exponentials
        #dist_reward = np.exp(- .001 * abs_dist)
        #vel_reward =  np.exp(- .01 * abs_vel)
        #ang_vel_reward = np.exp(- .01 * abs_ang_vel)
        #rew_fct = dist_reward * vel_reward * ang_vel_reward

        # the reward function - using arctan
        #rew_fct = np.arctan(1/dist**2)+0.01*np.arctan(1/speed**2)+0.01*np.arctan(1/ang_speed**2)
        #rew_fct = np.arctan(1/abs_dist)+0.01*np.arctan(1/abs_vel)+0.001*np.arctan(1/abs_ang_vel)

        # the reward function - using tanh
        #rew_fct =  np.tanh(1 - 0.0001 * dist - 0.0002 * vel - 0.0002 * ang_vel)

    def step(self, rotor_speeds):
        """Uses action to obtain next state, reward, done."""
        reward = 0
        pose_all = []
        for _ in range(self.action_repeat):
            done = self.sim.next_timestep(
                rotor_speeds)  # update the sim pose and velocities
            reward += self.get_reward()
            pose_all.append(self.sim.pose)
            next_state = np.concatenate(pose_all)
        return next_state, reward, done

    def reset(self):
        """Reset the sim to start a new episode."""
        self.sim.reset()
        state = np.concatenate([self.sim.pose] * self.action_repeat)
        return state
Esempio n. 4
0
# current_pos = np.array([1., 1., 1.])
# target_pos = np.array([10., 10., 10.])

# dist = np.linalg.norm(current_pos - target_pos)
# print(dist)

runtime = 5
init_pose = np.array([0., 0., 0., 0., 0., 0.])  # initial pose
init_velocities = np.array([0., 0., 0.])  # initial velocities
init_angle_velocities = np.array([0., 0., 0.])  # initial angle velocities
target_pos = np.array([-3., 5., 10.])

# dones = np.array([True, False, True])
# print((1 - dones))

sim = PhysicsSim(init_pose, init_velocities, init_angle_velocities, runtime)
last_dist = np.linalg.norm(target_pos - init_pose[:3])

# action = np.array([0, 0, 0, 0])
# action = action + np.array([-100, 0, 1, 904.5])
# print(action)
# print(np.clip(action, 0, 900))

last_pose = sim.pose[:3]
for i in range(200):
    # action = np.random.uniform(400, 900, 4)
    action = np.array([890, 900, 900, 900])
    # sim.reset()
    done = sim.next_timestep(action)  # update the sim pose and velocities
    print('done: ' + str(done))
    target_vec = target_pos - sim.pose[:3]