class Task(): """Task (environment) that defines the goal and provides feedback to the agent.""" def __init__(self, init_pose=None, init_velocities=None, init_angle_velocities=None, runtime=5., target_pos=None, action_repeat=1): """Initialize a Task object. Params ====== init_pose: initial position of the quadcopter in (x,y,z) dimensions and the Euler angles init_velocities: initial velocity of the quadcopter in (x,y,z) dimensions init_angle_velocities: initial radians/second for each of the three Euler angles runtime: time limit for each episode target_pos: target/goal (x,y,z) position for the agent """ # Simulation self.sim = PhysicsSim(init_pose, init_velocities, init_angle_velocities, runtime) self.action_repeat = action_repeat self.state_size = self.action_repeat * 6 self.action_low = 1 self.action_high = 900 self.action_size = self.action_repeat * 4 # Goal self.target_pos = target_pos if target_pos is not None else np.array([0., 0., 10.]) def get_reward(self): """Uses current pose of sim to return reward.""" reward = 10. if self.sim.pose[2] < (self.target_pos[2] - 3.): reward += 2*(self.sim.pose[2] - self.sim.init_pose[2]) reward -= (abs(self.sim.pose[:2] - self.target_pos[:2]) > 3.).sum() else: reward += 10. reward -= 2*(abs(self.sim.pose[:3] - self.target_pos) > 3.).sum() reward -= np.int32(abs(self.sim.pose[2] - self.target_pos[2]) > 3.) if self.sim.done: if self.sim.time < self.sim.runtime: reward -= 10 reward = reward / 10 return reward def step(self, rotor_speeds): """Uses action to obtain next state, reward, done.""" reward = 0 pose_all = [] for _ in range(self.action_repeat): done = self.sim.next_timestep(rotor_speeds[(_*4):((_*4)+4)]) # update the sim pose and velocities reward += self.get_reward() pose_all.append(self.sim.pose) next_state = np.concatenate(pose_all) return next_state, (reward / self.action_repeat), done def reset(self): """Reset the sim to start a new episode.""" self.sim.reset() state = np.concatenate([self.sim.pose] * self.action_repeat) return state
class TakeOff_Task(): """TakeOff (environment) that defines the goal and provides feedback to the agent.""" def __init__(self, init_pose=None, init_velocities=None, init_angle_velocities=None, runtime=5., target_pos=None): """Initialize a Task object. Params ====== init_pose: initial position of the quadcopter in (x,y,z) dimensions and the Euler angles init_velocities: initial velocity of the quadcopter in (x,y,z) dimensions init_angle_velocities: initial radians/second for each of the three Euler angles runtime: time limit for each episode target_pos: target/goal (x,y,z) position for the agent """ # Simulation self.sim = PhysicsSim(init_pose, init_velocities, init_angle_velocities, runtime) self.action_repeat = 3 self.state_size = self.action_repeat * 6 self.action_size = 4 self.action_low = 0 self.action_high = 900 # Goal: Takeoff the Quadcopter starting from land self.target_pos = target_pos if target_pos is not None else np.array( [0., 0., 10.]) def get_reward(self): """Uses current pose of sim to return reward.""" # Keep the Euler angles stable plotting them in a sin funtion # Multiply each angle and store, we want them to be between -1 and 1. optimal_reward = (1. - abs(math.sin(self.sim.pose[3]))) optimal_reward *= (1. - abs(math.sin(self.sim.pose[4]))) optimal_reward *= (1. - abs(math.sin(self.sim.pose[5]))) # How long we are delta = abs(self.sim.pose[:3] - self.target_pos[:3]) # Make sure we get positive numbers, dot product in order to apply the square root. distance = math.sqrt(np.dot(delta, delta)) if (distance > 0.01): penalty = distance else: penalty = 0 reward = 1. - penalty # Take into account Euler angles reward *= optimal_reward return reward def step(self, rotor_speeds): """Uses action to obtain next state, reward, done.""" reward = 0 pose_all = [] for _ in range(self.action_repeat): done = self.sim.next_timestep( rotor_speeds) # update the sim pose and velocities reward += self.get_reward() pose_all.append(self.sim.pose) next_state = np.concatenate(pose_all) return next_state, reward, done def reset(self): """Reset the sim to start a new episode.""" self.sim.reset() state = np.concatenate([self.sim.pose] * self.action_repeat) return state
class Task(): """Task (environment) that defines the goal and provides feedback to the agent.""" def __init__(self, init_pose=None, init_velocities=None, init_angle_velocities=None, runtime=5., target_pos=None): """Initialize a Task object. Params ====== init_pose: initial position of the quadcopter in (x,y,z) dimensions and the Euler angles init_velocities: initial velocity of the quadcopter in (x,y,z) dimensions init_angle_velocities: initial radians/second for each of the three Euler angles runtime: time limit for each episode target_pos: target/goal (x,y,z) position for the agent """ # Simulation self.sim = PhysicsSim(init_pose, init_velocities, init_angle_velocities, runtime) self.action_repeat = 3 self.state_size = self.action_repeat * 6 self.action_low = 0 self.action_high = 900 self.action_size = 4 # Goal self.target_pos = target_pos if target_pos is not None else np.array( [0., 0., 10.]) def get_reward(self): """Uses current pose of sim to return reward.""" reward = 1. - .001 * (abs(self.sim.pose[:3] - self.target_pos)).sum() #distance = np.sqrt(((self.sim.pose[:3] - self.target_pos)**2).sum()) #if distance < .15: #reward += 0.03 # vertical velocity if (abs(self.sim.pose[2] - self.target_pos[2])) < 0.20: reward += 0.03 # adding in x and y velocities if (abs(self.sim.pose[0] - self.target_pos[0])) < 0.20: reward += 0.03 if (abs(self.sim.pose[1] - self.target_pos[1])) < 0.20: reward += 0.03 if self.sim.time < self.sim.runtime and self.sim.done: reward -= 15 return reward def step(self, rotor_speeds): reward = 0 pose_all = [] for _ in range(self.action_repeat): done = self.sim.next_timestep( rotor_speeds) # update the sim pose and velocities reward += self.get_reward() pose_all.append(self.sim.pose) next_state = np.concatenate(pose_all) return next_state, reward, done def reset(self): self.sim.reset() state = np.concatenate([self.sim.pose] * self.action_repeat) return state
class Task(): """Task (environment) that defines the goal and provides feedback to the agent.""" def __init__(self, init_pose=None, init_velocities=None, init_angle_velocities=None, runtime=5., target_pos=None): """Initialize a Task object. Params ====== init_pose: initial position of the quadcopter in (x,y,z) dimensions and the Euler angles init_velocities: initial velocity of the quadcopter in (x,y,z) dimensions init_angle_velocities: initial radians/second for each of the three Euler angles runtime: time limit for each episode target_pos: target/goal (x,y,z) position for the agent """ # Simulation self.sim = PhysicsSim(init_pose, init_velocities, init_angle_velocities, runtime) self.action_repeat = 1 self.state_size = self.action_repeat * 6 self.action_low = 0 self.action_high = 900 self.action_size = 4 # Goal self.target_pos = target_pos if target_pos is not None else np.array( [0., 0., 10.]) def get_reward(self): """Uses current pose of sim to return reward.""" reward = -1 if self.sim.pose[2] >= self.target_pos[2]: reward = 50 elif self.sim.pose[2] <= 0: reward = -50 else: #reward = -abs(self.sim.pose[2] - self.target_pos[2]) #reward = self.sim.pose[2] reward += self.sim.v[2] reward -= 0.3 * abs(self.sim.pose[:2]).sum() #reward -= 0.2 * abs(self.sim.pose[3:]).sum() #print(self.sim.pose, self.sim.v) #reward -= 0.1 * abs(self.sim.pose[:2]).sum() #reward += max(self.sim.v[2], 5) if self.sim.v[2] > 0 else self.sim.v[2] #reward -= 0.2 * abs(self.sim.pose[3:]).sum() #print(self.sim.pose, self.sim.v) return reward def step(self, rotor_speeds): """Uses action to obtain next state, reward, done.""" reward = 0 pose_all = [] for _ in range(self.action_repeat): done = self.sim.next_timestep( rotor_speeds) # update the sim pose and velocities if self.sim.pose[2] >= self.target_pos[2]: done = True reward += self.get_reward() pose_all.append(self.sim.pose) next_state = np.concatenate(pose_all) return next_state, reward, done def reset(self): """Reset the sim to start a new episode.""" self.sim.reset() state = np.concatenate([self.sim.pose] * self.action_repeat) return state
class Task(): def __init__(self, target_pos, init_pose=None, init_velocities=None, init_angle_velocities=None, runtime=5., pos_noise=None, ang_noise=None, vel_noise=None, ang_vel_noise=None): self.target_pos = target_pos self.pos_noise = pos_noise self.ang_noise = ang_noise self.vel_noise = vel_noise self.ang_vel_noise = ang_vel_noise #These are going to get changed a lot hover = 400 #Set low/high for actions self.action_high = 1.2 * hover self.action_low = 0.99 * hover self.action_size = 1 #Init the velocities to blank defaults if not given to us if (init_velocities is None): init_velocities = np.array([0.0, 0.0, 0.0]) if (init_angle_velocities is None): init_angle_velocities = np.array([0.0, 0.0, 0.0]) # Here we create a physics simulation for the task # print("start", init_pose, init_velocities, init_angle_velocities) self.sim = PhysicsSim(init_pose, init_velocities, init_angle_velocities, runtime) self.state_size = len(self.get_state()) #tanh constants self.action_b = (self.action_high + self.action_low) / 2.0 self.action_m = (self.action_high - self.action_low) / 2.0 def get_reward(self): loss = (self.sim.pose[2] - self.target_pos[2]) loss = loss**2 loss += 0.1 * (self.sim.linear_accel[2]**2) delta = 0.5 reward_max = 1 reward_min = 0 #Calulate the Huber Loss reward = np.maximum( reward_max - delta * delta * (np.sqrt(1 + (loss / delta)**2) - 1), reward_min) return reward def normalize_angles(self, angles): # We want to keep angles between # -1 and +1 normalized_angles = np.copy(angles) for i in range(len(normalized_angles)): while normalized_angles[i] > np.pi: normalized_angles[i] -= 2 * np.pi return normalized_angles def get_state(self): position_error = (self.sim.pose[:3] - self.target_pos) return np.array( [position_error[2], self.sim.v[2], self.sim.linear_accel[2]]) def convert_action(self, action): #print (action, self.action_m, self.action_b) return (action[0] * self.action_m) + self.action_b def step(self, action): speed_of_rotors = self.convert_action(action) is_done = self.sim.next_timestep(speed_of_rotors * np.ones(4)) next_state = self.get_state() reward = self.get_reward() if reward <= 0: is_done = True return next_state, reward, is_done def reset(self): self.sim.reset() #############################################333 if self.action_size == 1: if self.pos_noise is not None or self.ang_noise is not None: new_random_pose = np.copy(self.sim.init_pose) if self.pos_noise is not None and self.pos_noise > 0: new_random_pose[2] += np.random.normal( 0.0, self.pos_noise, 1) self.sim.pose = np.copy(new_random_pose) if self.vel_noise is not None: new_velocity_random = np.copy(self.sim.init_velocities) new_velocity_random[2] += np.random.normal( 0.0, self.vel_noise, 1) self.sim.v = np.copy(new_velocity_random) return self.get_state() #############################################333 #############################################333 if self.pos_noise is not None or self.ang_noise is not None: new_random_pose = np.copy(self.sim.init_pose) if self.pos_noise is not None and self.pos_noise > 0: new_random_pose[:3] += np.random.normal(0.0, self.pos_noise, 3) if self.ang_noise is not None and self.ang_noise > 0: new_random_pose[3:] += np.random.normal(0.0, self.ang_noise, 3) self.sim.pose = np.copy(new_random_pose) #############################################333 #############################################333 if self.vel_noise is not None: new_velocity_random = np.copy(self.sim.init_velocities) new_velocity_random += np.random.normal(0.0, self.vel_noise, 3) self.sim.v = np.copy(new_velocity_random) #############################################333 #############################################333 if self.ang_vel_noise is not None: angle_velocity_random = np.copy(self.sim.init_angle_velocities) angle_velocity_random += np.random.normal(0.0, self.ang_vel_noise, 3) self.sim.angular_v = np.copy(angle_velocity_random) #############################################333 return self.get_state()
class Takeoff(): """Task (environment) that defines the goal and provides feedback to the agent.""" def __init__(self, init_pose=None, init_velocities=None, init_angle_velocities=None, runtime=5., target_pos=None): """Initialize a Task object. Params ====== init_pose: initial position of the quadcopter in (x,y,z) dimensions and the Euler angles init_velocities: initial velocity of the quadcopter in (x,y,z) dimensions init_angle_velocities: initial radians/second for each of the three Euler angles runtime: time limit for each episode target_pos: target/goal (x,y,z) position for the agent """ # Simulation self.sim = PhysicsSim(init_pose, init_velocities, init_angle_velocities, runtime) self.action_repeat = 3 self.state_size = self.action_repeat * 6 self.action_low = 0 self.action_high = 900 self.action_size = 4 # Goal self.target_pos = target_pos if target_pos is not None else np.array( [0., 0., 10.]) self.start_pos = init_pose[:3] if init_pose is not None else np.array( [0., 0., 10.]) self.start_xyz_dist = np.linalg.norm(self.target_pos - self.start_pos) self.start_z_dist = np.linalg.norm( np.array([0., 0., self.target_pos[2]]) - np.array([0., 0., self.start_pos[2]])) def get_reward(self): """Uses current pose of sim to return reward.""" curr_pos = self.sim.pose[:3] curr_z = np.array([0., 0., curr_pos[2]]) target_z = np.array([0., 0., self.target_pos[2]]) curr_z_dist = np.linalg.norm(target_z - curr_z) z_ratio = -(curr_z_dist / self.start_z_dist) z_reward = (z_ratio * 0.1) if curr_z_dist < self.start_z_dist else z_ratio speed_reward = min(0.005, max(0., (self.sim.v[2] - 10.) / 100.)) reward = z_reward + speed_reward return reward def step(self, rotor_speeds): """Uses action to obtain next state, reward, done.""" reward = 0 pose_all = [] for _ in range(self.action_repeat): done = self.sim.next_timestep( rotor_speeds) # update the sim pose and velocities reward += self.get_reward() pose_all.append(self.sim.pose) next_state = np.concatenate(pose_all) return next_state, reward / self.action_repeat, done def reset(self): """Reset the sim to start a new episode.""" self.sim.reset() state = np.concatenate([self.sim.pose] * self.action_repeat) self.prev_pos = np.copy(self.start_pos) return state
class Task(): """Task (environment) that defines the goal and provides feedback to the agent.""" def __init__(self, init_pose=None, init_velocities=None, init_angle_velocities=None, runtime=5., target_pos=None): """Initialize a Task object. Params ====== init_pose: initial position of the quadcopter in (x,y,z) dimensions and the Euler angles init_velocities: initial velocity of the quadcopter in (x,y,z) dimensions init_angle_velocities: initial radians/second for each of the three Euler angles runtime: time limit for each episode target_pos: target/goal (x,y,z) position for the agent """ # Simulation self.sim = PhysicsSim(init_pose, init_velocities, init_angle_velocities, runtime) self.action_repeat = 3 self.state_size = self.action_repeat * 6 self.action_low = 0 self.action_high = 900 self.action_size = 4 # Capture init_pose for use in reward function self.init_pose = init_pose if init_pose is not None else np.array( [0., 0., 0., 0., 0., 0.]) # Goal self.target_pos = target_pos if target_pos is not None else np.array( [0., 0., 10.]) def get_reward(self): """Uses current pose of sim to return reward.""" # Capture provided reward function #reward = 1.-.3*(abs(self.sim.pose[:3] - self.target_pos)).sum() """Reward design philosophy""" # The reward should increase as the copter gets to its target position (target_pos) # To to this, normalize the remaining distance by the total distance, # then subtract this value from 1, then multiply by a scaling factor # Punish rotations and x and y shifts from zero by applying a penalty """Calculating the reward""" # Calculate the distance between the current position(self.sim.pose[:3]) and target position distance_remaining = np.sqrt( np.sum([(x - y)**2 for x, y in zip(self.sim.pose[:3], self.target_pos)])) # Calculate the distance between the starting position and target position # This will be used to normalize the reward distance_total = np.sqrt( np.sum([(x - y)**2 for x, y in zip(self.init_pose[:3], self.target_pos)])) proximity = 1 - distance_remaining / distance_total proximity_reward = 10 * proximity if proximity > 0 else 0 # Punish rotation rotation_punish = sum( [1 if ii > 0.03 else 0 for ii in abs(self.sim.pose[3:])]) # Punish shift shift_punish = sum( [1 if ii > 1 else 0 for ii in abs(self.sim.pose[:2])]) reward = proximity_reward - rotation_punish - shift_punish reward = reward / 3 # rescale due to x3 action repeat return reward def step(self, rotor_speeds): """Uses action to obtain next state, reward, done.""" reward = 0 pose_all = [] for _ in range(self.action_repeat): done = self.sim.next_timestep( rotor_speeds) # update the sim pose and velocities reward += self.get_reward() pose_all.append(self.sim.pose) next_state = np.concatenate(pose_all) return next_state, reward, done def reset(self): """Reset the sim to start a new episode.""" self.sim.reset() state = np.concatenate([self.sim.pose] * self.action_repeat) return state
class Task(): """Task (environment) that defines the goal and provides feedback to the agent.""" def __init__(self, init_pose=None, init_velocities=None, init_angle_velocities=None, runtime=5., target_pos=None): """Initialize a Task object. Params ====== init_pose: initial position of the quadcopter in (x,y,z) dimensions and the Euler angles init_velocities: initial velocity of the quadcopter in (x,y,z) dimensions init_angle_velocities: initial radians/second for each of the three Euler angles runtime: time limit for each episode target_pos: target/goal (x,y,z) position for the agent """ # Simulation self.sim = PhysicsSim(init_pose, init_velocities, init_angle_velocities, runtime) self.action_repeat = 3 self.state_size = self.action_repeat * 6 self.action_low = 0 self.action_high = 900 self.action_size = 4 # Goal self.target_pos = target_pos if target_pos is not None else np.array( [0., 0., 10.]) self.old_pos = self.sim.pose def get_reward(self): """Uses current pose of sim to return reward.""" #位置 # reward = 1.-.3*(abs(self.sim.pose[:3] - self.target_pos)).sum() # reward = -min(abs(self.target_pos[2] - self.sim.pose[2]), 20.0) # if self.sim.pose[2] >= self.target_pos[2] : # reward += 10.0 # # 水平面距离 xy_distance = np.sqrt( ((self.target_pos[:2] - self.sim.pose[:2])**2).sum()) # 水平面速度 # xy_velocity = np.sqrt((self.sim.v[:2]**2).sum()) # 垂直距离 z_distance = abs(self.target_pos[2] - self.sim.pose[2]) # 垂直速度 z_velocity = self.sim.v[2] # # xy平面,相对于上一位置移动的距离 # xy_move = np.sqrt(((self.old_pos[:2] - self.sim.pose[:2])**2).sum()) #垂直平面相对于上一位置移动的距离 z_move = self.sim.pose[2] - self.old_pos[2] # 角速度 xyz_angular_v = (abs(self.sim.angular_v[:3])).sum() reward = -2.0 * z_distance + 4.0 * z_velocity - 4.0 * xyz_angular_v - xy_distance # reward = 1.-.3*(abs(self.sim.pose[:3] - self.target_pos)).sum() return reward def step(self, rotor_speeds): """Uses action to obtain next state, reward, done.""" reward = 0 pose_all = [] for _ in range(self.action_repeat): self.old_pos = self.sim.pose done = self.sim.next_timestep( rotor_speeds) # update the sim pose and velocities reward += self.get_reward() pose_all.append(self.sim.pose) next_state = np.concatenate(pose_all) return next_state, reward, done def reset(self): """Reset the sim to start a new episode.""" self.sim.reset() self.old_pos = None state = np.concatenate([self.sim.pose] * self.action_repeat) return state
class Task(): """Task (environment) that defines the goal and provides feedback to the agent.""" def __init__(self, init_pose=None, init_velocities=None, init_angle_velocities=None, runtime=5., target_pos=None): """Initialize a Task object. Params ====== init_pose: initial position of the quadcopter in (x,y,z) dimensions and the Euler angles init_velocities: initial velocity of the quadcopter in (x,y,z) dimensions init_angle_velocities: initial radians/second for each of the three Euler angles runtime: time limit for each episode target_pos: target/goal (x,y,z) position for the agent """ # Simulation self.sim = PhysicsSim(init_pose, init_velocities, init_angle_velocities, runtime) self.action_repeat = 5 self.reset() self.state_size = self.state.size self.action_low = 0 self.action_high = 900 self.action_size = 4 self.t = 0 # Goal self.target_pos = target_pos if target_pos is not None else np.array( [0., 0., 10.]) def get_reward(self, done, rotor_speeds, in_bounds): """Uses current pose of sim to return reward.""" penalty = 10 # takeoff task reward = 1 - penalty * (not in_bounds) - (1e-2) * ( np.linalg.norm(self.sim.pose[:3] - self.target_pos) ) #- (1e-4)*(np.sum(self.sim.pose[3:]**2)+np.sum(self.sim.angular_v**2)) # float task # reward = 1 - penalty*(not in_bounds) - (1e-4)*(np.sum(self.sim.pose[3:]**2)+np.sum(self.sim.angular_v**2)) return reward def step(self, rotor_speeds): """Uses action to obtain next state, reward, done.""" reward = 0 #pose_all = [] for _ in range(self.action_repeat): self.t += 1 # update the sim pose and velocities done = self.sim.next_timestep(rotor_speeds) reward += self.get_reward(done, rotor_speeds, self.sim.in_bounds) #pose_all.append(self.sim.pose) #next_state = np.concatenate(pose_all) next_state = np.concatenate( (self.sim.pose, self.sim.v, self.sim.angular_v)) return next_state, reward, done def reset(self): """Reset the sim to start a new episode.""" self.t = 0 self.sim.reset() #self.state = np.concatenate([self.sim.pose] * self.action_repeat) self.state = np.concatenate( (self.sim.pose, self.sim.v, self.sim.angular_v)) return self.state
class Task(): """Task (environment) that defines the goal and provides feedback to the agent.""" def __init__(self, init_pose=None, init_velocities=None, init_angle_velocities=None, runtime=5., target_pos=None): """Initialize a Task object. Params ====== init_pose: initial position of the quadcopter in (x,y,z) dimensions and the Euler angles init_velocities: initial velocity of the quadcopter in (x,y,z) dimensions init_angle_velocities: initial radians/second for each of the three Euler angles runtime: time limit for each episode target_pos: target/goal (x,y,z) position for the agent """ # Simulation self.sim = PhysicsSim(init_pose, init_velocities, init_angle_velocities, runtime) self.action_repeat = 3 self.state_size = self.action_repeat * 6 self.action_low = 0 self.action_high = 900 self.action_size = 4 # last position self.pose_last = self.sim.pose[:3] # Goal self.target_pos = target_pos if target_pos is not None else np.array( [0., 0., 10.]) def get_reward(self): """Uses current pose of sim to return reward.""" #reward = 1.-0.3*abs(self.sim.pose[:3] - self.target_pos).sum() #distance = np.linalg.norm(self.target_pos - self.sim.pose[:3]) #distance_last = np.linalg.norm(self.target_pos - self.pose_last) #reward = 0. + (distance_last - distance) #pose_diff = self.target_pos - self.sim.pose[:3] #e_R = pose_diff/np.linalg.norm(pose_diff) #e_v = self.sim.v/np.linalg.norm(self.sim.v) #reward = np.dot(self.sim.v, e_R) #reward = 1.-0.003*np.linalg.norm(self.target_pos - self.sim.pose[:3]) #reward = np.tanh(1.-.003*abs(self.sim.pose[:3] - self.target_pos) ).sum() #reward = np.tanh(1.-.003*abs(self.sim.pose[:3] - self.target_pos).sum() ) reward_xy = np.tanh( 1. - .01 * abs(self.sim.pose[:2] - self.target_pos[:2])).sum() reward_z = np.tanh(1. - .005 * abs(self.sim.pose[2] - self.target_pos[2])) reward = reward_xy + reward_z #reward = np.tanh(1.-.009*np.linalg.norm(self.sim.pose[:3] - self.target_pos) ) return reward def step(self, rotor_speeds): """Uses action to obtain next state, reward, done.""" reward = 0 pose_all = [] for _ in range(self.action_repeat): self.pose_last = self.sim.pose[:3] # record last position done = self.sim.next_timestep( rotor_speeds) # update the sim pose and velocities reward += self.get_reward() pose_all.append(self.sim.pose) next_state = np.concatenate(pose_all) return next_state, reward, done def reset(self): """Reset the sim to start a new episode.""" self.sim.reset() state = np.concatenate([self.sim.pose] * self.action_repeat) return state
class Task(): """Task (environment) that defines the goal and provides feedback to the agent.""" def __init__(self, init_pose=None, init_velocities=None, init_angle_velocities=None, runtime=5., target_pos=None): """Initialize a Task object. Params ====== init_pose: initial position of the quadcopter in (x,y,z) dimensions and the Euler angles init_velocities: initial velocity of the quadcopter in (x,y,z) dimensions init_angle_velocities: initial radians/second for each of the three Euler angles runtime: time limit for each episode target_pos: target/goal (x,y,z) position for the agent """ # Simulation self.sim = PhysicsSim(init_pose, init_velocities, init_angle_velocities, runtime) self.action_repeat = 1 self._states_to_be_used = ["pose", "linear_accel", "angular_v"] self.state_size = self.action_repeat * sum( [len(getattr(self.sim, x)) for x in self._states_to_be_used]) self.action_low = 0 self.action_high = 900 self.action_size = 4 # Goal self.target_pos = target_pos if target_pos is not None else np.array( [0., 0., 10.]) def get_states(self): return [getattr(self.sim, x) for x in self._states_to_be_used] def get_reward(self): """Uses current pose of sim to return reward.""" # position_score = max(10.-.3*np.linalg.norm(self.sim.pose[:3] - self.target_pos), -10) position_score = 10. - .3 * np.linalg.norm(self.sim.pose[:3] - self.target_pos)**2 # angular_stationary_score = max(10.-.3*np.linalg.norm(self.sim.angular_v), -10) angular_stationary_score = 10. - .3 * np.linalg.norm( self.sim.angular_v)**2 reward = (position_score + .1 * angular_stationary_score) / 10. # reward = np.sqrt(np.abs(reward)) * np.sign(reward) return reward def step(self, rotor_speeds): """Uses action to obtain next state, reward, done.""" reward = 0 pose_all = [] for _ in range(self.action_repeat): done = self.sim.next_timestep( rotor_speeds) # update the sim pose and velocities reward += self.get_reward() pose_all.append(np.concatenate(self.get_states())) next_state = np.concatenate(pose_all) return next_state, reward, done def reset(self): """Reset the sim to start a new episode.""" self.sim.reset() state = np.concatenate(self.get_states() * self.action_repeat) return state
class Task(): """Task (environment) that defines the goal and provides feedback to the agent.""" def __init__(self, init_pose=None, init_velocities=None, init_angle_velocities=None, runtime=5., target_pos=None): """Initialize a Task object. Params ====== init_pose: initial position of the quadcopter in (x,y,z) dimensions and the Euler angles init_velocities: initial velocity of the quadcopter in (x,y,z) dimensions init_angle_velocities: initial radians/second for each of the three Euler angles runtime: time limit for each episode target_pos: target/goal (x,y,z) position for the agent """ # Simulation self.sim = PhysicsSim(init_pose, init_velocities, init_angle_velocities, runtime) self.action_repeat = 3 self.state_size = self.action_repeat * 6 self.action_low = 300 self.action_high = 500 self.action_size = 4 # Goal if target_pos is None: print("Setting default init pose") self.target_pos = target_pos if target_pos is not None else np.array( [0., 0., 10.]) def get_reward(self): """Uses current pose of sim to return reward.""" # reward = -min(abs(self.target_pos[2] - self.sim.pose[2]), 20.0) # reward = zero for matching target z, -ve as you go farther, upto -20 # z_reward = np.tanh(1 - 0.003*(abs(self.sim.pose[2] - self.target_pos[2]))).sum() # xy_reward = np.tanh(1 - 0.009*(abs(self.sim.pose[:2] - self.target_pos[:2]))).sum() # reward = z_reward + xy_reward reward = 0.0 sim = self.sim reward += 0.01 * sim.pose[2] reward -= 0.02 * (abs(sim.pose[:2])).sum() reward -= 0.025 * (abs(sim.pose[5])) reward += 0.005 * sim.v[2] reward -= 0.01 * (abs(sim.v[:2])).sum() reward -= 0.025 * (abs(self.sim.angular_v[:3])).sum() if self.sim.angular_v[2] == 0 or self.sim.pose[5] == 0: reward += 100 return reward / 100 def step(self, rotor_speeds): """Uses action to obtain next state, reward, done.""" reward = 0 pose_all = [] for _ in range(self.action_repeat): done = self.sim.next_timestep( rotor_speeds) # update the sim pose and velocities reward += self.get_reward() pose_all.append(self.sim.pose) next_state = np.concatenate(pose_all) return next_state, reward, done def reset(self): """Reset the sim to start a new episode.""" self.sim.reset() state = np.concatenate([self.sim.pose] * self.action_repeat) return state
class Task(): """Task (environment) that defines the goal and provides feedback to the agent.""" def __init__(self, init_pose=None, init_velocities=None, init_angle_velocities=None, runtime=5., target_pos=None): """Initialize a Task object. Params ====== init_pose: initial position of the quadcopter in (x,y,z) dimensions and the Euler angles init_velocities: initial velocity of the quadcopter in (x,y,z) dimensions init_angle_velocities: initial radians/second for each of the three Euler angles runtime: time limit for each episode target_pos: target/goal (x,y,z) position for the agent """ # Simulation self.sim = PhysicsSim(init_pose, init_velocities, init_angle_velocities, runtime) self.action_repeat = 3 self.state_size = self.action_repeat * 6 self.action_low = 410 self.action_high = 420 self.action_size = 4 # Goal self.target_pos = target_pos if target_pos is not None else np.array( [0., 0., 10.]) def get_reward(self, done): """Uses current pose of sim to return reward.""" down_penalty = 0 run_time_penalty = 0 vertical_coeff = 0.05 if self.sim.pose[2] < 0.5: down_penalty = -10 vert_pos = np.linalg.norm(self.sim.pose[2] - self.target_pos[2]) if vert_pos < 2: vertical_coeff = 0.01 rot_pos = abs(self.sim.pose[3:]).sum() x_y_pos = np.linalg.norm(self.sim.pose[:2] - self.target_pos[:2]) return 1 - .005 * (vert_pos) - .001 * (x_y_pos) - .001 * ( rot_pos) + down_penalty def step(self, rotor_speeds): """Uses action to obtain next state, reward, done.""" reward = 0 pose_all = [] for _ in range(self.action_repeat): done = self.sim.next_timestep( rotor_speeds) # update the sim pose and velocities reward += self.get_reward(done) pose_all.append(self.sim.pose) # if self.sim.pose[2] > 10.0: # done = True next_state = np.concatenate(pose_all) return next_state, reward, done def reset(self): """Reset the sim to start a new episode.""" self.sim.reset() state = np.concatenate([self.sim.pose] * self.action_repeat) return state
class Task(): """Task (environment) that defines the goal and provides feedback to the agent.""" def __init__(self, init_pose=None, init_velocities=None, init_angle_velocities=None, runtime=5., target_pos=None): """Initialize a Task object. Params ====== init_pose: initial position of the quadcopter in (x,y,z) dimensions and the Euler angles init_velocities: initial velocity of the quadcopter in (x,y,z) dimensions init_angle_velocities: initial radians/second for each of the three Euler angles runtime: time limit for each episode target_pos: target/goal (x,y,z) position for the agent """ # Simulation self.sim = PhysicsSim(init_pose, init_velocities, init_angle_velocities, runtime) self.action_repeat = 3 self.state_size = self.action_repeat * 6 self.action_low = 0 self.action_high = 900 self.action_size = 4 # Goal self.target_pos = target_pos if target_pos is not None else np.array( [0., 0., 10.]) def get_reward(self): """Uses current pose of sim to return reward.""" reward = 1. - .3 * (abs(self.sim.pose[:3] - self.target_pos)).sum() return reward def get_reward_enanched(self): """Uses current pose of sim to return reward.""" # penalty growing with euler angles so we mantain stability stability_score = 100. - .4 * abs(self.sim.angular_v[:3]).sum() # more the distance, more the penalty position_score = 100. - .6 * np.linalg.norm(self.sim.pose[:3] - self.target_pos)**2 reward = stability_score + position_score return reward def step(self, rotor_speeds, enanched=False): """Uses action to obtain next state, reward, done.""" reward = 0 pose_all = [] for _ in range(self.action_repeat): done = self.sim.next_timestep( rotor_speeds) # update the sim pose and velocities if enanched: reward += self.get_reward_enanched() else: reward += self.get_reward() pose_all.append(self.sim.pose) next_state = np.concatenate(pose_all) return next_state, reward, done def reset(self): """Reset the sim to start a new episode.""" self.sim.reset() state = np.concatenate([self.sim.pose] * self.action_repeat) return state
class Task(): """Task (environment) that defines the goal and provides feedback to the agent.""" def __init__(self, init_pose=None, init_velocities=None, init_angle_velocities=None, runtime=5., target_pos=None): """Initialize a Task object. Params ====== init_pose: initial position of the quadcopter in (x,y,z) dimensions and the Euler angles init_velocities: initial velocity of the quadcopter in (x,y,z) dimensions init_angle_velocities: initial radians/second for each of the three Euler angles runtime: time limit for each episode target_pos: target/goal (x,y,z) position for the agent """ # Simulation self.sim = PhysicsSim(init_pose, init_velocities, init_angle_velocities, runtime) self.action_repeat = 3 self.state_size = self.action_repeat * 3 self.action_low = 300 self.action_high = 900 self.action_size = 4 # Goal self.target_pos = target_pos if target_pos is not None else np.array( [0., 0., 10.]) def get_reward(self): """Uses current pose of sim to return reward.""" [[reward]] = cosine_similarity(self.sim.pose[:3].reshape(1, -1), self.target_pos.reshape(1, -1)) # print('reward', reward) # reward = np.linalg.norm(self.sim.pose[:3] - self.target_pos) # if reward < 4: # reward return reward #1. - 0.3 * abs(self.sim.pose[:3] - self.target_pos).sum() def step(self, rotor_speeds): """Uses action to obtain next state, reward, done.""" reward = 0 pose_all = [] for _ in range(self.action_repeat): done = self.sim.next_timestep( rotor_speeds) # update the sim pose and velocities reward += self.get_reward() pose_all.append(self.sim.pose) next_state = np.concatenate(pose_all) # print('next_state', next_state) # print('angular_accels', self.sim.angular_accels) # print('angular_velocity', self.sim.angular_accels) return next_state, reward, done def reset(self): """Reset the sim to start a new episode.""" self.sim.reset() state = np.concatenate([self.sim.pose] * self.action_repeat) return state
class CustomTask(): """Task (environment) that defines the goal and provides feedback to the agent.""" def __init__(self, init_pose=None, init_velocities=None, init_angle_velocities=None, runtime=5., target_pos=None): """Initialize a Task object. Params ====== init_pose: initial position of the quadcopter in (x,y,z) dimensions and the Euler angles init_velocities: initial velocity of the quadcopter in (x,y,z) dimensions init_angle_velocities: initial radians/second for each of the three Euler angles runtime: time limit for each episode target_pos: target/goal (x,y,z) position for the agent """ # Simulation self.sim = PhysicsSim(init_pose, init_velocities, init_angle_velocities, runtime) self.action_repeat = 3 self.state_size = self.action_repeat * 6 self.action_low = 0 self.action_high = 900 self.action_size = 4 # Goal self.target_pos = target_pos if target_pos is not None else np.array([0., 0., 10.]) def get_reward(self): """Uses current pose of sim to return reward.""" reward = np.tanh(1.-.01*(abs(self.sim.pose[:3] - self.target_pos))).sum() loss_x = abs(self.sim.pose[0] - self.target_pos[0])**2 loss_y = abs(self.sim.pose[1] - self.target_pos[1])**2 loss_z = abs(self.sim.pose[2] - self.target_pos[2])**2 threshold = 0.10 distance = np.sqrt(loss_x + loss_y + loss_z) if distance < threshold: reward += 0.2 if self.sim.time > self.sim.runtime: # agent has run out of time reward -= 10.0 # extra penalty done = True if self.sim.pose[2] >= self.target_pos[2]: # agent has crossed the target height reward += 10.0 # bonus reward done = True return np.tanh(reward) def step(self, rotor_speeds): """Uses action to obtain next state, reward, done.""" reward = 0 pose_all = [] for _ in range(self.action_repeat): done = self.sim.next_timestep(rotor_speeds) # update the sim pose and velocities reward += self.get_reward() pose_all.append(self.sim.pose) next_state = np.concatenate(pose_all) return next_state, reward, done def reset(self): """Reset the sim to start a new episode.""" self.sim.reset() state = np.concatenate([self.sim.pose] * self.action_repeat) return state
class Takeoff: """Task (environment) that defines the goal and provides feedback to the agent.""" def __init__(self, init_pose=None, debug=False): """Initialize a Task object. Params ====== init_pose: initial position of the quadcopter in (x,y,z) dimensions and the Euler angles init_velocities: initial velocity of the quadcopter in (x,y,z) dimensions init_angle_velocities: initial radians/second for each of the three Euler angles runtime: time limit for each episode target_pos: target/goal (x,y,z) position for the agent """ self.sim = PhysicsSim(init_pose, None, None, 5.0) self.action_repeat = 1 self.state_size = self.action_repeat * 5 self.action_low = 0 self.action_high = 900 self.action_size = 4 self.debug = debug def normalize_angles(self, euler_angles): """Normalize Euler angles from 0,2*pi to -1,1 range.""" angles = copy.copy(euler_angles) for ii in range(len(angles)): angles[ii] = angles[ii] if (angles[ii] < np.pi) else (angles[ii] - 2 * np.pi) angles[ii] /= np.pi return angles def get_reward(self, rotor_speeds): """Uses current pose of sim to return reward.""" # Normalize roll and pitch to [-1,1] norm_eulers = self.normalize_angles(self.sim.pose[3:5 + 1]) if self.debug: # Print latest position and normalized Euler angles print( "(x',y',z')=({:6.2f},{:6.2f},{:6.2f}), (phi,theta,ksi)=({:6.2f},{:6.2f},{:6.2f})" .format(*self.sim.v[:3], *norm_eulers)) sys.stdout.flush() # Cost of excessive roll or pitch. Normalized to [-5,0]. attitude_cost = -(5 / 2) * sum(abs(norm_eulers[:2])) # Cost of lateral movement ignored for now. # normalize to -5,0 lateral_cost = -sum(abs(self.sim.v[0:2])) # Reward for z-speed up minus cost of rolling or pitching reward = self.sim.v[2] + attitude_cost #reward = self.sim.v[2] + 1./np.std(rotor_speeds)+ lateral_cost + attitude_cost # Reward for this step + debugging info return reward, lateral_cost, attitude_cost def step(self, rotor_speeds): """Uses action to obtain next state, reward, done.""" reward = np.zeros(3) pose_all = [] for _ in range(self.action_repeat): done = self.sim.next_timestep( rotor_speeds) # update the sim pose and velocities reward += np.array(self.get_reward(rotor_speeds)) #pose_all.append(self.sim.pose) pose_all.append(np.hstack([self.sim.v, self.sim.pose[3:5]])) next_state = np.concatenate(pose_all) return next_state, reward, done def reset(self): """Reset the sim to start a new episode.""" self.sim.reset() #state = np.concatenate([self.sim.pose] * self.action_repeat) state = np.concatenate([np.hstack([self.sim.v, self.sim.pose[3:5]])] * self.action_repeat) return state
class Task(): """Task (environment) that defines the goal and provides feedback to the agent.""" def __init__(self, init_pose=None, init_velocities=None, init_angle_velocities=None, runtime=5., target_pos=None): """Initialize a Task object. Params ====== init_pose: initial position of the quadcopter in (x,y,z) dimensions and the Euler angles init_velocities: initial velocity of the quadcopter in (x,y,z) dimensions init_angle_velocities: initial radians/second for each of the three Euler angles runtime: time limit for each episode target_pos: target/goal (x,y,z) position for the agent """ # Simulation self.sim = PhysicsSim(init_pose, init_velocities, init_angle_velocities, runtime) self.action_repeat = 3 self.state_size = self.action_repeat * 6 self.action_low = 0 self.action_high = 900 self.action_size = 4 # Added self.success = False # Goal self.target_pos = target_pos if target_pos is not None else np.array( [0., 0., 10.]) def get_reward(self): """Uses current pose of sim to return reward.""" # Check if it reaches target height if self.sim.pose[2] >= self.target_pos[2]: self.success = True return 1 # gain better reward if it comes closer to target pos reward = 1. - min(self.sim.pose[2] - self.target_pos[2], 100) / 100 #reward = 1.-.3*(abs(self.sim.pose[:3] - self.target_pos)).sum() return reward def step(self, rotor_speeds): """Uses action to obtain next state, reward, done.""" # Modified to teach quadcopter to takeoff to a target height reward = 0 pose_all = [] for _ in range(self.action_repeat): done = self.sim.next_timestep( rotor_speeds) # update the sim pose and velocities reward += self.get_reward() pose_all.append(self.sim.pose) next_state = np.concatenate(pose_all) # agent will receive a reward of 10 if it reaches the target height and penalty of -10 if it doesn't if done: if self.success: reward += 10 else: reward -= 10 return next_state, reward, done def reset(self): """Reset the sim to start a new episode.""" self.sim.reset() # reset status self.success = False state = np.concatenate([self.sim.pose] * self.action_repeat) return state
class Task(): """Task (environment) that defines the goal and provides feedback to the agent.""" def __init__(self, obs='pos', init_pose=None, init_velocities=None, init_angle_velocities=None, runtime=5., target_pos=None): """Initialize a Task object. Params ====== obs: 'pos', 'pos_v', 'pos_v_ang' init_pose: initial position of the quadcopter in (x,y,z) dimensions and the Euler angles init_velocities: initial velocity of the quadcopter in (x,y,z) dimensions init_angle_velocities: initial radians/second for each of the three Euler angles runtime: time limit for each episode target_pos: target/goal (x,y,z) position for the agent """ # make an sim object self.sim = PhysicsSim(init_pose, init_velocities, init_angle_velocities, runtime) self.obs = obs self.action_repeat = 3 # task's observation size if self.obs == 'pos': s_len = 6 # position [x, y, z, phi, theta, psi] elif self.obs == 'pos_v': s_len = 9 # position + velocity [vx, vy, vz] elif self.obs == 'pos_v_ang': s_len = 12 # position + velocity + angular velocity [vphi, vtheta, vpsi] self.state_size = self.action_repeat * s_len self.action_low = 0 self.action_high = 900 self.action_size = 4 # 4 roter speeds # Goal self.target_pos = target_pos if target_pos is not None else np.array( [0., 0., 10.]) def get_reward(self): """ Reward function for task "takeoff" """ ## difference vector delta = self.sim.pose[:3] - self.target_pos ## Euclidean distance to the target dtotal = np.sqrt(np.dot(delta, delta)) ## abs distance in z (height) to the target dz = np.abs(delta[2]) ## reward dtotal (300 m is the env bound of the sim) reward = distance_reward(dtotal, dmax=np.sqrt(300**2 + 300**2 + 300**2)) \ * time_reward(self.sim.time, tmax=self.sim.runtime) ## success condition: height if dz < 1: self.sim.done = True # end the episode. (no further thing to learn if target is reached.) reward += 100 # an extra big reward. (to avoid whirling around the target for collecting positive reward) return reward def step(self, rotor_speeds): """Uses action to obtain next state, reward, done.""" reward = 0 stmp = [] for _ in range(self.action_repeat): done = self.sim.next_timestep( rotor_speeds) # update the sim pose and velocities reward += self.get_reward() if self.obs == 'pos': s = np.array(self.sim.pose) # shape (6,) elif self.obs == 'pos_v': s = np.concatenate((self.sim.pose, self.sim.v)) # shape (9,) elif self.obs == 'pos_v_ang': s = np.concatenate((self.sim.pose, self.sim.v, self.sim.angular_v)) # shape (12,) stmp.append(s) next_state = np.concatenate(stmp) return next_state, reward, done def reset(self): """Reset the sim to start a new episode.""" self.sim.reset() if self.obs == 'pos': s = np.array(self.sim.pose) # shape (6,) elif self.obs == 'pos_v': s = np.concatenate((self.sim.pose, self.sim.v)) # shape (9,) elif self.obs == 'pos_v_ang': s = np.concatenate( (self.sim.pose, self.sim.v, self.sim.angular_v)) # shape (12,) state = np.concatenate([s] * self.action_repeat) return state
class Task(): """Task (environment) that defines the goal and provides feedback to the agent.""" def __init__(self, init_pose=None, init_velocities=None, init_angle_velocities=None, runtime=5., target_pos=None): """Initialize a Task object. Params ====== init_pose: initial position of the quadcopter in (x,y,z) dimensions and the Euler angles init_velocities: initial velocity of the quadcopter in (x,y,z) dimensions init_angle_velocities: initial radians/second for each of the three Euler angles runtime: time limit for each episode target_pos: target/goal (x,y,z) position for the agent """ # Simulation self.sim = PhysicsSim(init_pose, init_velocities, init_angle_velocities, runtime) self.action_repeat = 3 self.state_size = self.action_repeat * 6 self.action_low = 0 self.action_high = 900 self.action_size = 4 # Goal if target_pos is None : print("Setting default init pose") self.target_pos = 10.0 self.max_duration = 10.0 #sec def get_reward(self): """Uses current pose of sim to return reward.""" if (self.target_pos - self.sim.pose[2]) < 0: if (self.target_pos - self.sim.pose[2])> -3: reward = np.tanh(1 - 0.0005*(abs(self.target_pos - self.sim.pose[2])).sum())/5 if (self.target_pos - self.sim.pose[2])< -3: reward = np.tanh(1 - 0.0005*(abs(self.target_pos - self.sim.pose[2])).sum())*2 else: reward = np.tanh(1 - 0.0005*(abs(self.target_pos - self.sim.pose[2])).sum()) return reward def step(self, rotor_speeds): """Uses action to obtain next state, reward, done.""" reward = 0 pose_all = [] for _ in range(self.action_repeat): done = self.sim.next_timestep(rotor_speeds) # update the sim pose and velocities reward += self.get_reward() pose_all.append(self.sim.pose) if done : # reward += 10 if self.sim.pose[2] >= self.target_pos: # agent has crossed the target height reward += 7.0 # bonus reward if self.sim.time > self.max_duration: # agent has run out of time reward -= 10.0 # extra penalty if self.sim.time < self.max_duration: # agent has run out of time reward += 3.0 # bonus reward next_state = np.concatenate(pose_all) return next_state, reward, done def reset(self): """Reset the sim to start a new episode.""" self.sim.reset() state = np.concatenate([self.sim.pose] * self.action_repeat) return state
class Task(): """Task (environment) that defines the goal and provides feedback to the agent.""" def __init__(self, init_pose=None, init_velocities=None, init_angle_velocities=None, runtime=5., target_pos=None): """Initialize a Task object. Params ====== init_pose: initial position of the quadcopter in (x,y,z) dimensions and the Euler angles init_velocities: initial velocity of the quadcopter in (x,y,z) dimensions init_angle_velocities: initial radians/second for each of the three Euler angles runtime: time limit for each episode target_pos: target/goal (x,y,z) position for the agent """ # Simulation self.sim = PhysicsSim(init_pose, init_velocities, init_angle_velocities, runtime) self.action_repeat = 3 self.state_size = self.action_repeat * 6 self.action_low = 0 self.action_high = 900 self.action_size = 4 # Goal self.target_pos = target_pos if target_pos is not None else np.array( [0., 0., 10.]) self.task_success = False def get_reward(self): """Uses current pose of sim to return reward.""" return -1.0 * abs(self.sim.pose[2] - self.target_pos[2]) def step(self, rotor_speeds): """Uses action to obtain next state, reward, done.""" reward = 0 pose_all = [] for _ in range(self.action_repeat): done = self.sim.next_timestep( rotor_speeds) # update the sim pose and velocities reward += self.get_reward() # Terminal Rewards / Penalties for Task completion / failure epsilon = 2.0 diff = abs(self.sim.pose[2] - self.target_pos[2]) if diff <= epsilon: reward += 1000 self.task_success = True elif diff > epsilon and done == True: reward -= 1000 pose_all.append(self.sim.pose) next_state = np.concatenate(pose_all) return next_state, reward, done def reset(self): """Reset the sim to start a new episode.""" self.sim.reset() self.task_success = False state = np.concatenate([self.sim.pose] * self.action_repeat) return state
class Task(): """Task (environment) that defines the goal and provides feedback to the agent.""" def __init__(self, init_pose=None, init_velocities=None, init_angle_velocities=None, runtime=5., target_pose=None): """Initialize a Task object. Params ====== init_pose: initial position of the quadcopter in (x,y,z) dimensions and the Euler angles init_velocities: initial velocity of the quadcopter in (x,y,z) dimensions init_angle_velocities: initial radians/second for each of the three Euler angles runtime: time limit for each episode target_pos: target/goal (x,y,z) position for the agent """ # Simulation self.sim = PhysicsSim(init_pose, init_velocities, init_angle_velocities, runtime) self.action_repeat = 3 self.state_size = self.action_repeat * 6 self.action_low = 0 self.action_high = 900 self.action_size = 4 self.init_z = init_pose[2] # Goal self.target_pose = target_pose if target_pose is not None else np.array( [0., 0., 10.]) def get_reward(self): """Uses current pose of sim to return reward.""" """Takeoff""" # punish_x = np.tanh(abs(self.sim.pose[0] - self.target_pose[0])) #punish_y = np.tanh(abs(self.sim.pose[1] - self.target_pose[1])) reward_z = 3 * np.tanh(self.sim.pose[2] - self.init_z) punish_rot1 = np.tanh(abs(self.sim.pose[3])) punish_rot2 = np.tanh(abs(self.sim.pose[4])) punish_rot3 = np.tanh(abs(self.sim.pose[5])) reward = reward_z - punish_rot1 - punish_rot2 - punish_rot3 dist = abs(np.linalg.norm(self.target_pose[:3] - self.sim.pose[:3])) if dist < 3: reward += 10 * np.tanh(dist) else: reward -= np.tanh(dist) reward -= np.tanh(np.linalg.norm(self.sim.v)) reward -= np.tanh(np.linalg.norm(self.sim.angular_v)) if self.sim.v[2] > 0: reward += np.tanh(self.sim.v[2]) return reward def step(self, rotor_speeds): """Uses action to obtain next state, reward, done.""" reward = 0 pose_all = [] for _ in range(self.action_repeat): done = self.sim.next_timestep( rotor_speeds) # update the sim pose and velocities reward += self.get_reward() pose_all.append(self.sim.pose) next_state = np.concatenate(pose_all) return next_state, reward, done def reset(self): """Reset the sim to start a new episode.""" self.sim.reset() state = np.concatenate([self.sim.pose] * self.action_repeat) return state
class Task(): """Task (environment) that defines the goal and provides feedback to the agent.""" def __init__(self, init_pose=None, init_velocities=None, init_angle_velocities=None, runtime=5., target_pos=None): """Initialize a Task object. Params ====== init_pose: initial position of the quadcopter in (x,y,z) dimensions and the Euler angles init_velocities: initial velocity of the quadcopter in (x,y,z) dimensions init_angle_velocities: initial radians/second for each of the three Euler angles runtime: time limit for each episode target_pos: target/goal (x,y,z) position for the agent """ # Simulation self.sim = PhysicsSim(init_pose, init_velocities, init_angle_velocities, runtime) self.action_repeat = 3 self.state_size = self.action_repeat * 6 self.action_low = 0 self.action_high = 900 self.action_size = 4 # Goal self.target_pos = target_pos if target_pos is not None else np.array( [0., 0., 10.]) def get_reward_new(self): """Uses current pose of sim to return reward.""" #return reward reward = 0.0 # Reward positive velocity along z-axis reward += self.sim.v[2] # Reward positions close to target along z-axis reward -= (abs(self.sim.pose[2] - self.target_pos[2])) / 2.0 # A lower sensativity towards drifting in the xy-plane reward -= (abs(self.sim.pose[:2] - self.target_pos[:2])).sum() / 4.0 reward -= (abs(self.sim.angular_v[:3])).sum() return reward def step_new(self, rotor_speeds): """Uses action to obtain next state, reward, done.""" reward = 0 pose_all = [] for _ in range(self.action_repeat): done = self.sim.next_timestep( rotor_speeds) # update the sim pose and velocities reward += self.get_reward_new() pose_all.append(self.sim.pose) if (self.sim.pose[2] >= self.target_pos[2]): reward += 100 done = True next_state = np.concatenate(pose_all) return next_state, reward, done def get_reward(self): """Uses current pose of sim to return reward.""" reward = 1. - .3 * (abs(self.sim.pose[:3] - self.target_pos)).sum() return reward def step(self, rotor_speeds): """Uses action to obtain next state, reward, done.""" reward = 0 pose_all = [] for _ in range(self.action_repeat): done = self.sim.next_timestep( rotor_speeds) # update the sim pose and velocities reward += self.get_reward() pose_all.append(self.sim.pose) next_state = np.concatenate(pose_all) return next_state, reward, done def reset(self): """Reset the sim to start a new episode.""" self.sim.reset() state = np.concatenate([self.sim.pose] * self.action_repeat) return state
class HoverTask(Task): """Task (environment) that defines the goal and provides feedback to the agent.""" def __init__(self, init_pose=None, init_velocities=None, init_angle_velocities=None, min_height=1., reward_weights = None, runtime=5.): """Initialize a Task object. Params ====== init_pose: initial position of the quadcopter in (x,y,z) dimensions and the Euler angles init_velocities: initial velocity of the quadcopter in (x,y,z) dimensions init_angle_velocities: initial radians/second for each of the three Euler angles min_height: minimum height to maintain in meters runtime: time limit for each episode in seconds """ # Simulation self.init_pose = init_pose if init_pose is not None else np.array([0.,0.,10.,0.,0.,0.]) self.init_velocities = init_velocities if init_velocities is not None else np.zeros(3) self.init_angle_velocities = init_angle_velocities if init_angle_velocities is not None else np.zeros(3) self.sim = PhysicsSim(self.init_pose, self.init_velocities, self.init_angle_velocities, runtime) self.action_repeat = 3 self.state_size = self.action_repeat * 12 # 6 pose + 6 velocity self.action_low = 0 # Min RPM value self.action_high = 900 # Max RPM value self.action_size = 4 # Num actions (4 rotors) # Goal self.target_pos = init_pose[0:3] if init_pose is not None else np.array([0., 0., 10.]) self.reward_weights = reward_weights if reward_weights is not None else np.array([10.,0.,-2.,-1.]) # Helpful for debugging # def get_reward_components(self): # norm = lambda a: np.sum(np.square(a)) # return 1, norm(self.sim.pose[0:3]-self.target_pos), norm(self.sim.v), norm(self.sim.angular_v) def get_reward(self): """Uses current pose of sim to return reward.""" height_diff = abs(self.sim.pose[2]-self.init_pose[2]) vert_v = abs(self.sim.v[2]) spin_v = sqrt(np.sum(np.square(self.sim.angular_v))) out_of_bounds = np.any(np.concatenate( (np.less_equal(self.sim.pose[:3],self.sim.lower_bounds), np.greater_equal(self.sim.pose[:3],self.sim.upper_bounds) ))) oob_penalty = 100 if out_of_bounds else 0 reward = 1./(1+exp(-height_diff))+1./(1+exp(-vert_v/10.))-spin_v/100. return reward def step(self, rotor_speeds): """Uses action to obtain next state, reward, done.""" reward = 0 pose_all = [] # Clip rotor speeds to min and max rotor_speeds = np.maximum(rotor_speeds, self.action_low*np.ones(self.action_size)) rotor_speeds = np.minimum(rotor_speeds, self.action_high*np.ones(self.action_size)) for _ in range(self.action_repeat): done = self.sim.next_timestep(rotor_speeds) # update the sim pose and velocities reward += self.get_reward() pose_all.append(np.concatenate((self.sim.pose, self.sim.v, self.sim.angular_v))) # save pose, v and angular_v next_state = np.concatenate(pose_all) return next_state, reward, done def reset(self): """Reset the sim to start a new episode.""" self.sim.reset() state = np.concatenate([np.concatenate((self.sim.pose, self.sim.v, self.sim.angular_v))] * self.action_repeat) return state
class Task(): """Task (environment) that defines the goal and provides feedback to the agent.""" def __init__(self, init_pose=None, init_velocities=None, init_angle_velocities=None, runtime=5., target_pos=None): """Initialize a Task object. Params ====== init_pose: initial position of the quadcopter in (x,y,z) dimensions and the Euler angles init_velocities: initial velocity of the quadcopter in (x,y,z) dimensions init_angle_velocities: initial radians/second for each of the three Euler angles runtime: time limit for each episode target_pos: target/goal (x,y,z) position for the agent """ # Simulation self.sim = PhysicsSim(init_pose, init_velocities, init_angle_velocities, runtime) self.action_repeat = 3 self.state_size = self.action_repeat * 6 self.action_low = 0 self.action_high = 900 self.action_size = 4 # Goal self.target_pos = target_pos if target_pos is not None else np.array( [0., 0., 10.]) def get_reward(self): """Uses current pose of sim to return reward.""" reward = 0. ## Add the vertical velocity value to a reward when the quadcopter flew up and down in z-direction. ## If the velocity goes up, then the reward gets increased. Otherwise, it gets decreased. reward += self.sim.v[2] ## Check x and y position movement pos_xy = 1 - 2 * (abs(self.sim.pose[:2] - self.target_pos[:2])).sum() if pos_xy < -1: ## Give a penalty if x and y position has been changed too much reward -= 1 elif pos_xy == 1: ## reward if maintains the vertical in x0 and y0 reward += 2 else: ## If x and y moved very little, it is considered as a well-maintenance in x and y position reward += 1 ## Check angular velocity change av = 1 - 2 * (abs(self.sim.angular_v[:3])).sum() ## If the angular velocities are irregular enough to disrupt the quadcopter's vertical ascending movement. ## then give a negative reward. Otherwise, a positive reward if av > 1: reward -= 1 elif av < -1: reward += 1 ## Checked the vertical distance between the current position and the target position. z_distance = 1 - 2 * (abs(self.sim.pose[2] - self.target_pos[2])) if z_distance < -1: ## if the z-distance is getting bigger, then give a negative reward. reward -= 1 elif z_distance > 0: ## if the z-distance is 0, then give a big positive reward. reward += 5 else: ## if the z-distance is getting closer, then give a positive reward. reward += 2 return reward def step(self, rotor_speeds): """Uses action to obtain next state, reward, done.""" reward = 0 pose_all = [] for _ in range(self.action_repeat): done = self.sim.next_timestep( rotor_speeds) # update the sim pose and velocities reward += self.get_reward() pose_all.append(self.sim.pose) if (self.sim.pose[2] > self.target_pos[2]): reward += 200 done = True next_state = np.concatenate(pose_all) return next_state, reward, done def reset(self): """Reset the sim to start a new episode.""" self.sim.reset() state = np.concatenate([self.sim.pose] * self.action_repeat) return state
class Task(): """Task (environment) that defines the goal and provides feedback to the agent.""" def __init__(self, init_pose=None, init_velocities=None, init_angle_velocities=None, runtime=5., target_pos=None): """Initialize a Task object. Params ====== init_pose: initial position of the quadcopter in (x,y,z) dimensions and the Euler angles init_velocities: initial velocity of the quadcopter in (x,y,z) dimensions init_angle_velocities: initial radians/second for each of the three Euler angles runtime: time limit for each episode target_pos: target/goal (x,y,z) position for the agent """ # Simulation self.sim = PhysicsSim(init_pose, init_velocities, init_angle_velocities, runtime) self.action_repeat = 3 self.state_size = self.action_repeat * 6 self.action_low = 1 self.action_high = 900 self.action_size = 4 self.runtime = runtime ''' try limiting the action space and state space to only vertical set the rotor speeds equal to each other if possible tweak the neural network in model.py ''' # Goal self.target_pos = target_pos if target_pos is not None else np.array( [0., 0., 10.]) def distance_to_target(self, current, target): import math d_1 = current[0] - target[0] d_2 = current[1] - target[1] d_3 = current[2] - target[2] deviation = math.sqrt((d_1)**2 + (d_2)**2 + (d_3)**2) return deviation def avg_rotor_speed(self, rotor_speeds): import math average = np.average(rotor_speeds) s_0 = rotor_speeds[0] - average s_1 = rotor_speeds[1] - average s_2 = rotor_speeds[2] - average s_3 = rotor_speeds[3] - average deviation = math.sqrt((s_0)**2 + (s_1)**2 + (s_2)**2 + (s_3)**2) return deviation def get_reward(self, rotor_speeds, state): """Uses current pose of sim to return reward.""" # reward = 1.-.3*(abs(self.sim.pose[:3] - self.target_pos)).sum() # distance_delta = self.distance_to_target(self.sim.pose, self.target_pos) # angular_v_delta = self.distance_to_target(self.sim.angular_v, [0,0,0]) # speed_delta = self.avg_rotor_speed(rotor_speeds) ## runtime # if self.runtime - self.sim.time == 0: # bonus = .5 # else: # bonus = 0 # reward = 1.0 - (0.5*dist_rew) #+ (-0.2 * angular_v_delta) + (-0.005 * speed_delta) # + bonus reward = (1 / (abs(state[2] - self.target_pos[2]) / 10)) # dist_rew = distance_delta if (abs(state[2] - self.target_pos[2])) <= 2: reward = 5 ## penalise for crashing if self.sim.pose[2] == 0: reward = -10 return reward def step(self, rotor_speeds, state): """Uses action to obtain next state, reward, done.""" reward = 0 pose_all = [] for _ in range(self.action_repeat): done = self.sim.next_timestep( rotor_speeds) # update the sim pose and velocities reward += self.get_reward(rotor_speeds, state) pose_all.append(self.sim.pose) # new position next_state = np.concatenate(pose_all) return next_state, reward, done def reset(self): """Reset the sim to start a new episode.""" self.sim.reset() state = np.concatenate([self.sim.pose] * self.action_repeat) return state
class Task(): """Task (environment) that defines the goal and provides feedback to the agent.""" def __init__(self, init_pose=None, init_velocities=None, init_angle_velocities=None, runtime=5., target_pos=None, task_name='vanilla'): """Initialize a Task object. Params ====== init_pose: initial position of the quadcopter in (x,y,z) dimensions and the Euler angles init_velocities: initial velocity of the quadcopter in (x,y,z) dimensions init_angle_velocities: initial radians/second for each of the three Euler angles runtime: time limit for each episode target_pos: target/goal (x,y,z) position for the agent """ # Simulation self.sim = PhysicsSim(init_pose, init_velocities, init_angle_velocities, runtime) self.action_repeat = 3 self.state_size = self.action_repeat * 6 self.action_size = 4 self.action_low = 300 self.action_high = 1000 self.runtime = runtime # Goal self.target_pos = target_pos if target_pos is not None else np.array( [0., 0., 10.]) self.task_name = task_name self.ep_reward = 0 self.pos_reach_treshold = 0.5 def get_reward(self, pose_rep=[]): return eval("self.get_reward_" + self.task_name)(pose_rep) def get_reward_vanilla(self, pose_rep): """Uses current pose of sim to return reward.""" reward = 1. - .3 * (abs(self.sim.pose[:3] - self.target_pos)).sum() return reward def get_reward_takeoff(self, pose_rep): #weights for x,y,z dims ---> z more important for takeoff xyz_w = [.3, .3, .4] time_reward = 1 dir_reward = np.dot(xyz_w, self.get_direction_reward(pose_rep)) pos_reward = np.dot(xyz_w, self.get_pos_reward(pose_rep)) #weights of direction / position / time step_weights = [.15, .25, .6] step_reward = np.tanh( np.dot(step_weights, [dir_reward, pos_reward, time_reward])) terminal_reward = 0 if self.sim.done and self.sim.time < self.runtime: terminal_reward = -10 #crash elif self.get_task_success(pose_rep): terminal_reward = 10 #target position reached #print([appr_reward, pos_reward, time_reward, crash_penalty]) #print("reward:{} z_approx:{} z_posdiff:{} crash_penalty:{}".format(reward, z_approx, -z_posdiff, crash_penalty)) return step_reward + terminal_reward def get_direction_reward(self, pose_rep): pos_log = np.array(pose_rep)[:, :3] diff_init = np.abs(np.array(pos_log - self.target_pos)) #print("\n",diff_init) mrate = np.diff(diff_init, axis=0) #print("\n",speeds) dir_reward = -np.average(mrate, axis=0, weights=[.3, .7]) #print("\n",appr_reward) return dir_reward def get_pos_reward(self, pose_rep): pos_diff = self.get_pos_diff(pose_rep[self.action_repeat - 1]) pos_reward = np.array( [-pd if pd > self.pos_reach_treshold else 10 for pd in pos_diff]) return pos_reward def get_pos_diff(self, pose): return np.abs(pose[:3] - self.target_pos) def get_task_success(self, pose_rep): pos_diff = self.get_pos_diff(pose_rep[self.action_repeat - 1]) pos_r = np.array( [0 if pd > self.pos_reach_treshold else 1 for pd in pos_diff]) return np.sum(pos_r) == 3 def step(self, rotor_speeds): """Uses action to obtain next state, reward, done.""" reward = 0 pose_all = [] for _ in range(self.action_repeat): done = self.sim.next_timestep( rotor_speeds) # update the sim pose and velocities #reward += self.get_reward() pose_all.append(self.sim.pose) reward = self.get_reward(pose_all) self.ep_reward += reward #done if target position is reached done = done or self.get_task_success(pose_all) next_state = np.concatenate(pose_all) return next_state, reward, done def reset(self): """Reset the sim to start a new episode.""" self.ep_reward = 0 self.sim.reset() state = np.concatenate([self.sim.pose] * self.action_repeat) return state
class Task(): """Task (environment) that defines the goal and provides feedback to the agent.""" # -------------------------------------------------------------------------- def __init__(self, init_pose=None, init_velocities=None, init_angle_velocities=None, runtime=5., target_pos=None): """Initialize a Task object. Params ====== init_pose: initial position of the quadcopter in (x,y,z) dimensions and the Euler angles init_velocities: initial velocity of the quadcopter in (x,y,z) dimensions init_angle_velocities: initial radians/second for each of the three Euler angles runtime: time limit for each episode target_pos: target/goal (x,y,z) position for the agent """ # Goal self.target_pos = target_pos if target_pos is not None else np.array( [0., 0., 10.]) self.init_pose = init_pose # Simulation self.sim = PhysicsSim(init_pose, init_velocities, init_angle_velocities, runtime) self.action_repeat = 3 self.state_size = self.action_repeat * 6 self.action_low = 0 self.action_high = 900 self.action_size = 4 self.count = 1 # -------------------------------------------------------------------------- def get_reward(self): """Uses current pose of sim to return reward.""" differences = 0 checks = list(range(0, 4)) combos = itertools.combinations(checks, 2) for combo in combos: differences -= abs(self.sim.prop_wind_speed[combo[0]] - self.sim.prop_wind_speed[combo[1]]) reward = 1 - .3 * abs(self.target_pos - self.sim.pose[:3]).sum( ) - .2 * np.tanh(abs(self.sim.pose[4])) #- .1*np.tanh(differences) #print(differences) #if done and self.sim.pose[2] <= self.init_pose[2]: reward += -1000 #reward = 1. - .3*(abs(self.sim.pose[:3] - self.target_pos)).sum() - 6*(1/self.count) #reward = 1. - .3*(abs(self.sim.pose[:3] - self.target_pos)).sum() return reward # -------------------------------------------------------------------------- def step(self, rotor_speeds): """Uses action to obtain next state, reward, done.""" reward = 0 self.count += 1 pose_all = [] for _ in range(self.action_repeat): done = self.sim.next_timestep( rotor_speeds) # update the sim pose and velocities reward += self.get_reward() pose_all.append(self.sim.pose) next_state = np.concatenate(pose_all) return next_state, reward, done # -------------------------------------------------------------------------- def reset(self): """Reset the sim to start a new episode.""" self.sim.reset() self.count = 1 state = np.concatenate([self.sim.pose] * self.action_repeat) return state
class Task(): """Task (environment) that defines the goal and provides feedback to the agent.""" def __init__(self, init_pose=None, init_velocities=None, init_angle_velocities=None, runtime=5., target_pos=None): """Initialize a Task object. Params ====== init_pose: initial position of the quadcopter in (x,y,z) dimensions and the Euler angles init_velocities: initial velocity of the quadcopter in (x,y,z) dimensions init_angle_velocities: initial radians/second for each of the three Euler angles runtime: time limit for each episode target_pos: target/goal (x,y,z) position for the agent """ # Simulation self.sim = PhysicsSim(init_pose, init_velocities, init_angle_velocities, runtime) self.action_repeat = 3 self.state_size = self.action_repeat * 6 self.action_low = 0 self.action_high = 900 self.action_size = 4 # Goal self.target_pos = target_pos if target_pos is not None else np.array( [0., 0., 10.]) def get_reward(self): """Uses current pose of sim to return reward.""" reward = 0 penalty = 0 current_position = self.sim.pose[:3] # penalty for euler angles, we want the takeoff to be stable penalty += abs(self.sim.pose[3:6]).sum() # penalty for distance from target penalty += abs(current_position[0] - self.target_pos[0])**2 penalty += abs(current_position[1] - self.target_pos[1])**2 penalty += 10 * abs(current_position[2] - self.target_pos[2])**2 # link velocity to residual distance penalty += abs( abs(current_position - self.target_pos).sum() - abs(self.sim.v).sum()) distance = np.sqrt((current_position[0] - self.target_pos[0])**2 + (current_position[1] - self.target_pos[1])**2 + (current_position[2] - self.target_pos[2])**2) # extra reward for flying near the target if distance < 10: reward += 1000 # constant reward for flying reward += 100 return reward - penalty * 0.0002 def step(self, rotor_speeds): """Uses action to obtain next state, reward, done.""" reward = 0 pose_all = [] for _ in range(self.action_repeat): done = self.sim.next_timestep( rotor_speeds) # update the sim pose and velocities reward += self.get_reward() pose_all.append(self.sim.pose) next_state = np.concatenate(pose_all) return next_state, reward, done def reset(self): """Reset the sim to start a new episode.""" self.sim.reset() state = np.concatenate([self.sim.pose] * self.action_repeat) return state
class Task(): """Task (environment) that defines the goal and provides feedback to the agent.""" def __init__(self, init_pose=None, init_velocities=None, init_angle_velocities=None, runtime=5., target_pos=None): """Initialize a Task object. Params ====== init_pose: initial position of the quadcopter in (x,y,z) dimensions and the Euler angles init_velocities: initial velocity of the quadcopter in (x,y,z) dimensions init_angle_velocities: initial radians/second for each of the three Euler angles runtime: time limit for each episode target_pos: target/goal (x,y,z) position for the agent """ # Simulation self.sim = PhysicsSim(init_pose, init_velocities, init_angle_velocities, runtime) self.action_repeat = 3 self.state_size = self.action_repeat * self.get_sim_state().shape[0] self.action_low = 0 self.action_high = 900 self.action_size = 4 # Goal self.start_state = self.get_sim_state() # self.target_pos = target_pos if target_pos is not None else np.array([0., 0., 10.]) def get_reward(self): """Uses current pose of sim to return reward.""" def bound_val(val, coef, min_val, max_val): return coef * np.clip(val, min_val, max_val) #reward_goal = 1.-.3*(abs(self.sim.pose[:3] - self.target_pos)).sum() # goal position reward_no_roll = -np.power(self.sim.pose[3], 2) # no roll reward_no_pitch = -np.power(self.sim.pose[4], 2) # no pitch reward_no_xy_v = -np.power(self.sim.v[:2], 2).sum() / 30 # no xy velocity reward_z_v = self.sim.v[2] reward = bound_val(reward_no_xy_v, 1, -1, 0) \ + bound_val(reward_z_v, 1, 0, 10) \ + bound_val(reward_no_roll, 5, -1, 0) \ + bound_val(reward_no_pitch, 5, -1, 0) return reward def get_sim_state(self): return np.hstack([self.sim.pose, self.sim.v, self.sim.angular_v]) def step(self, rotor_speeds): """Uses action to obtain next state, reward, done.""" reward = 0 pose_all = [] for _ in range(self.action_repeat): done = self.sim.next_timestep( rotor_speeds) # update the sim pose and velocities reward += self.get_reward() pose_all.append(self.get_sim_state()) next_state = np.concatenate(pose_all) return next_state, reward, done def reset(self): """Reset the sim to start a new episode.""" self.sim.reset() self.start_state = self.get_sim_state() state = np.concatenate([self.get_sim_state()] * self.action_repeat) return state
class Takeoff(): """Task (environment) that defines the goal and provides feedback to the agent.""" def __init__(self, init_pose=None, init_velocities=None, init_angle_velocities=None, runtime=5., target_pos=None,position_noise=None): """Initialize a Task object. Params ====== init_pose: initial position of the quadcopter in (x,y,z) dimensions and the Euler angles init_velocities: initial velocity of the quadcopter in (x,y,z) dimensions init_angle_velocities: initial radians/second for each of the three Euler angles runtime: time limit for each episode target_pos: target/goal (x,y,z) position for the agent """ print("########2 Task to Takeoff") # Simulation self.sim = PhysicsSim(init_pose, init_velocities, init_angle_velocities, runtime) self.action_repeat = 3 self.state_size = self.action_repeat * 6 self.action_low = 0 self.action_high = 900 self.action_size = 4 self.runtime = runtime self.position_noise = position_noise # Goal self.target_pos = target_pos if target_pos is not None else np.array([0., 0., 10.]) def get_reward(self): """Uses current pose of sim to return reward.""" #reward = 1.-.3*(abs(self.sim.pose[:3] - self.target_pos)).sum() reward = 0.0 #encorage acceleration in the +z-direction #reward += self.sim.linear_accel[2] #reward += 0.5 * self.sim.linear_accel[2] * self.sim.dt**2 #encorage velocity in the +z-direction if self.sim.v[2] > 0: reward = 10*self.sim.v[2] else: reward = -10 #encorage smaller position errors and limit the demenonator to not be too much close to zero #posDiff = -abs(self.sim.pose[ 2] - self.target_pos[ 2]) #**2 #if posDiff > 0.1: # reward += 10/posDiff #else : # reward += 100 return reward def step(self, rotor_speeds): """Uses action to obtain next state, reward, done.""" reward = 0 pose_all = [] for _ in range(self.action_repeat): done = self.sim.next_timestep(rotor_speeds) # update the sim pose and velocities reward += self.get_reward() pose_all.append(self.sim.pose) #sparse reward at the end if target is reached if(self.sim.pose[2] >= self.target_pos[2]): print("\n sucessfull takeoff to target target !!!!!!!!!") reward += 10 done = True #if np.isclose(self.target_pos[2], self.sim.pose[2], 1): # reward += 100 # done = True #if(self.sim.pose[2] >= self.target_pos[2] and self.sim.time >= self.runtime): #if np.allclose(self.sim.pose[:-3],self.target_pos, rtol=1): # reward +=100 # done = True next_state = np.concatenate(pose_all) return next_state, reward, done def reset(self): """Reset the sim with noise to start a new episode.""" self.sim.reset() #::os:: add random start position as suggested by some papers to make agent more generalized if self.position_noise is not None or self.ang_noise is not None: rand_pose = np.copy(self.sim.init_pose) if self.position_noise is not None and self.position_noise > 0: rand_pose[:3] += np.random.normal(0.0, self.position_noise, 3) #print("start rand_pose =",rand_pose) self.sim.pose = np.copy(rand_pose) state = np.concatenate([self.sim.pose] * self.action_repeat) return state
class Task(): """Task (environment) that defines the goal and provides feedback to the agent.""" def __init__(self, init_pose=None, init_velocities=None, init_angle_velocities=None, runtime=5., target_pos=None): """Initialize a Task object. Params ====== init_pose: initial position of the quadcopter in (x,y,z) dimensions and the Euler angles init_velocities: initial velocity of the quadcopter in (x,y,z) dimensions init_angle_velocities: initial radians/second for each of the three Euler angles runtime: time limit for each episode target_pos: target/goal (x,y,z) position for the agent """ # Simulation self.sim = PhysicsSim(init_pose, init_velocities, init_angle_velocities, runtime) self.action_repeat = 3 self.state_size = self.action_repeat * 6 self.action_low = 0 self.action_high = 900 self.action_size = 4 self.is_done = 0 # Goal self.target_pos = target_pos if target_pos is not None else np.array( [0., 0., 10.]) def get_reward(self): """Uses current pose of sim to return reward.""" # penalize crash factor = 1 if abs(self.sim.pose[2] - self.target_pos[2]) == 0: factor = 100000000 elif self.is_done and self.sim.time < self.sim.runtime: factor = -100 else: factor = (1 / abs(self.sim.pose[2] - self.target_pos[2])) reward = factor + (100. - .3 * (abs(self.sim.pose[:3] - self.target_pos)).sum()) #print("\rposition= ", self.sim.pose[:3], ", reward = ", reward) return reward def step(self, rotor_speeds): """Uses action to obtain next state, reward, done.""" reward = 0 pose_all = [] for _ in range(self.action_repeat): done = self.sim.next_timestep( rotor_speeds) # update the sim pose and velocities self.is_done = done reward += self.get_reward() pose_all.append(self.sim.pose) next_state = np.concatenate(pose_all) return next_state, reward, done, self.sim.pose def reset(self): """Reset the sim to start a new episode.""" self.sim.reset() state = np.concatenate([self.sim.pose] * self.action_repeat) return state