def __init__(self): self.drone = Drone() self.state = np.zeros(13) self.steps_beyond_done = None self.pos_threshold = 1 self.vel_threshold = 0.1 ini_pos = np.array([0.0, 0.0, 5.0]) + np.random.uniform(-1, 1, (3, )) ini_att = euler2quat( np.array([deg2rad(0), deg2rad(0), 0]) + np.random.uniform(-0.2, 0.2, (3, ))) ini_angular_rate = np.array([0, deg2rad(0), 0]) self.ini_state = np.zeros(13) self.ini_state[0:3] = ini_pos self.ini_state[6:10] = ini_att self.ini_state[10:] = ini_angular_rate pos_des = np.array([0.0, 0.0, 5.0]) # [x, y, z] att_des = euler2quat(np.array([deg2rad(0), deg2rad(0), deg2rad(0)])) self.state_des = np.zeros(13) self.state_des[0:3] = pos_des self.state_des[6:10] = att_des low = self.drone.state_lim_low high = self.drone.state_lim_high self.action_space = spaces.Box(low=np.array([0.0, 0.0, 0.0, 0.0]), high=np.array([1.0, 1.0, 1.0, 1.0])) self.observation_space = spaces.Box(low=low, high=high) self.action_max = np.array([1.0, 1.0, 1.0, 1.0 ]) * self.drone.mass * self.drone.gravity self.seed()
ini_state = np.zeros(13) ini_state[0:3] = ini_pos ini_state[3:6] = ini_vel ini_state[6:10] = ini_att ini_state[10:] = ini_angular_rate pos_des = np.array([10, -50, 5]) # [x, y, z] vel_des = np.array([0, 0, 0]) att_des = euler2quat(np.array([deg2rad(0.0), deg2rad(0.0), deg2rad(0.0)])) state_des = np.zeros(13) state_des[0:3] = pos_des state_des[3:6] = vel_des state_des[6:10] = att_des # Initial a drone and set its initial state quad1 = Drone() quad1.reset(ini_state) control = controller(quad1.get_arm_length(), quad1.get_mass()) # Control Command u = np.zeros(quad1.dim_u) # u[0] = quad1.get_mass() * 9.81 # u[3] = 0.2 total_step = 1500 state = np.zeros((total_step, 13)) state_des_all = np.zeros((total_step, 13)) rpy = np.zeros((total_step, 3)) time = np.zeros(total_step) u_all = np.zeros((total_step, 4))
class HoveringEnv(gym.Env): metadata = {'render.modes': ['human']} def __init__(self): self.drone = Drone() self.state = np.zeros(13) self.steps_beyond_done = None self.pos_threshold = 1 self.vel_threshold = 0.1 ini_pos = np.array([0.0, 0.0, 5.0]) + np.random.uniform(-1, 1, (3, )) ini_att = euler2quat( np.array([deg2rad(0), deg2rad(0), 0]) + np.random.uniform(-0.2, 0.2, (3, ))) ini_angular_rate = np.array([0, deg2rad(0), 0]) self.ini_state = np.zeros(13) self.ini_state[0:3] = ini_pos self.ini_state[6:10] = ini_att self.ini_state[10:] = ini_angular_rate pos_des = np.array([0.0, 0.0, 5.0]) # [x, y, z] att_des = euler2quat(np.array([deg2rad(0), deg2rad(0), deg2rad(0)])) self.state_des = np.zeros(13) self.state_des[0:3] = pos_des self.state_des[6:10] = att_des low = self.drone.state_lim_low high = self.drone.state_lim_high self.action_space = spaces.Box(low=np.array([0.0, 0.0, 0.0, 0.0]), high=np.array([1.0, 1.0, 1.0, 1.0])) self.observation_space = spaces.Box(low=low, high=high) self.action_max = np.array([1.0, 1.0, 1.0, 1.0 ]) * self.drone.mass * self.drone.gravity self.seed() # self.reset() def step(self, action): reward = 0.0 att_error = 0.0 att_vel_error = 0.0 u = self.drone.rotor2control @ (self.action_max * action[:]) self.state = self.drone.step(u) # done1 = bool(-self.pos_threshold < np.linalg.norm(self.state[0:3] - self.state_des[0:3], # 2) < self.pos_threshold and -self.vel_threshold < np.linalg.norm(self.state[3:6] - self.state_des[3:6], # 2) < self.vel_threshold)\ or pos_error = self.state_des[0:3] - self.state[0:3] vel_error = self.state_des[3:6] - self.state[3:6] att_error = quat2euler(self.state_des[6:10]) - quat2euler( self.state[6:10]) att_vel_error = self.state_des[10:] - self.state[10:] r_thre = 0.0 if np.linalg.norm(pos_error, 2) < 0.1 and np.linalg.norm(vel_error, 2) < 0.1: r_thre = +1.0 else: r_thre = 0.0 done = bool((np.linalg.norm(self.state[0:3], 2) > 100) or (np.linalg.norm(self.state[3:6], 2) > 100)) if not done: reward = r_thre + 0.1 - 0.01 * (np.linalg.norm(pos_error, 2)) \ - 0.001 *np.linalg.norm(vel_error, 2) \ - 0.01 * np.linalg.norm(att_error, 2) \ - 0.001 *np.linalg.norm(att_vel_error, 2) else: reward = -0.1 # time = self.drone.get_time() return self.state, reward, done, {} def reset(self): out = self.drone.reset(self.ini_state) return out def render(self, mode='human'): return None def close(self): return None def seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) return [seed]
class VideoDockingEnv(gym.Env): metadata = {'render.modes': ['human']} def __init__(self): self.chaser = Drone() self.target = Drone() self.target_controller = controller(self.target.get_arm_length(), self.target.get_mass()) self.state_chaser = np.zeros(13) self.state_target = np.zeros(13) self.rel_state = np.zeros(12) self.t = 0 self.done = False self.reward = 0.0 self.shaping = 0.0 self.last_shaping = 0.0 self.obs = np.zeros((240, 320, 3), dtype=np.uint8) self.chaser_pub_srv = srv(1) self.target_pub_srv = srv(2) # self.steps_beyond_done = None # Chaser Initial State chaser_ini_pos = np.array([8, -50, 5]) # + np.random.uniform(-0.5, 0.5, (3,)) chaser_ini_vel = np.array([0, 0, 0]) # + np.random.uniform(-0.1, 0.1, (3,)) chaser_ini_att = euler2quat(np.array([0.0, 0.0, 0.0])) # + np.random.uniform(-0.2, 0.2, (3,))) chaser_ini_angular_rate = np.array([0.0, 0.0, 0.0]) # + np.random.uniform(-0.1, 0.1, (3,)) self.chaser_dock_port = np.array([0.1, 0.0, 0.0]) self.chaser_ini_state = np.zeros(13) self.chaser_ini_state[0:3] = chaser_ini_pos self.chaser_ini_state[3:6] = chaser_ini_vel self.chaser_ini_state[6:10] = chaser_ini_att self.chaser_ini_state[10:] = chaser_ini_angular_rate self.state_chaser = self.chaser.reset(self.chaser_ini_state, self.chaser_dock_port) # Target Initial State target_ini_pos = np.array([10, -50, 5]) target_ini_vel = np.array([0.0, 0.0, 0.0]) target_ini_att = euler2quat(np.array([0.0, 0.0, 0.0])) target_ini_angular_rate = np.array([0.0, 0.0, 0.0]) self.target_dock_port = np.array([-0.1, 0, 0]) self.target_ini_state = np.zeros(13) self.target_ini_state[0:3] = target_ini_pos self.target_ini_state[3:6] = target_ini_vel self.target_ini_state[6:10] = target_ini_att self.target_ini_state[10:] = target_ini_angular_rate self.state_target = self.target.reset(self.target_ini_state, self.target_dock_port) # Target Final State target_pos_des = np.array([10, -50, 5]) # [x, y, z] target_att_des = euler2quat(np.array([0.0, 0.0, 0.0])) self.target_state_des = np.zeros(13) self.target_state_des[0:3] = target_pos_des self.target_state_des[6:10] = target_att_des # Final Relative Error self.rel_pos_threshold = 1 self.rel_vel_threshold = 0.1 self.rel_att_threshold = np.array([deg2rad(0), deg2rad(0), deg2rad(0)]) self.rel_att_rate_threshold = np.array([deg2rad(0), deg2rad(0), deg2rad(0)]) # chaser_dp = self.chaser.get_dock_port_state() # drone A # target_dp = self.target.get_dock_port_state() # drone B self.rel_state = state2rel(self.state_chaser, self.state_target, self.chaser.get_dock_port_state(), self.target.get_dock_port_state()) # State Limitation chaser_low = self.chaser.state_lim_low chaser_high = self.chaser.state_lim_high target_low = self.target.state_lim_low target_high = self.target.state_lim_high # obs rel info: 12x1 [rel_pos, rel_vel, rel_rpy, rel_rpy_rate] self.action_space = spaces.Box(low=np.array([-1.0, -1.0, -1.0, -1.0]), high=np.array([1.0, 1.0, 1.0, 1.0]), dtype=np.float32) # Gray Image Observation self.observation_space = spaces.Box(low=0, high=255, shape=(240, 320, 3), dtype=np.uint8) # self.action_max = np.array([1.0, 1.0, 1.0, 1.0]) * self.chaser.mass * self.chaser.gravity self.action_mean = np.array([1.0, 1.0, 1.0, 1.0]) * self.chaser.mass * self.chaser.gravity / 2.0 self.action_std = np.array([1.0, 1.0, 1.0, 1.0]) * self.chaser.mass * self.chaser.gravity / 2.0 self.seed() # self.reset() def step(self, action): # last_reward = self.reward # reward = 0.0 # shaping = 0.0 self.t += 1 old_state_target = self.state_target old_state_chaser = self.state_chaser old_rel_state = self.rel_state old_chaser_dp = self.chaser.get_dock_port_state() action_chaser = self.chaser.rotor2control @ (self.action_std * action[:] + self.action_mean) # action_chaser = self.chaser.rotor2control @ (self.action_max * action[:]) action_target = self.target_controller.PID(self.target_state_des, self.state_target) self.state_target = self.target.step(action_target) self.state_chaser = self.chaser.step(action_chaser) self.chaser_pub_srv.send_state(int(self.t), self.state_chaser) self.target_pub_srv.send_state(int(self.t), self.state_target) img = ImageGrab.grab([0, 0, 1920, 1080]) # img.convert('L') # time.sleep(0.1) resize_img = img.resize((320, 240), Image.ANTIALIAS) bbb = np.array(resize_img) self.obs = bbb # dock port relative state chaser_dp = self.chaser.get_dock_port_state() # drone A target_dp = self.target.get_dock_port_state() # drone B self.rel_state = state2rel(self.state_chaser, self.state_target, chaser_dp, target_dp) # done_final = False # done_overlimit = False flag_docking = bool((np.linalg.norm(self.rel_state[0:3], 2) < 0.1) and (np.linalg.norm(self.rel_state[3:6], 2) < 0.1) and (np.abs(self.rel_state[6]) < deg2rad(10)) and (np.abs(self.rel_state[7]) < deg2rad(10)) and (np.abs(self.rel_state[8]) < deg2rad(10))) done_overlimit = bool((np.linalg.norm(self.rel_state[0:3]) >= 3) or self.state_chaser[2] <= 0.1) done_overtime = bool(self.t >= 600) self.done = bool(done_overlimit or done_overtime) reward_docked = 0 if flag_docking: reward_docked = +1.0 reward_action = np.linalg.norm(action[:], 2) self.shaping = - 10.0 * np.sqrt(np.sum(np.square(self.rel_state[0:3] / 3.0))) \ - 1.0 * np.sqrt(np.sum(np.square(self.rel_state[3:6]))) \ - 10.0 * np.sqrt(np.sum(np.square(self.rel_state[6:9] / np.pi))) \ - 1.0 * np.sqrt(np.sum(np.square(self.rel_state[9:]))) \ - 0.1 * reward_action + 1.0 * reward_docked self.reward = self.shaping - self.last_shaping self.last_shaping = self.shaping # reward += 0.1 * self.t info = {'chaser': self.state_chaser, 'target': self.state_target, 'flag_docking': flag_docking, 'done_overlimit': done_overlimit} return self.obs, self.reward, self.done, info def reset(self): self.state_chaser = self.chaser.reset(self.chaser_ini_state, self.chaser_dock_port) self.state_target = self.target.reset(self.target_ini_state, self.target_dock_port) chaser_dp = self.chaser.get_dock_port_state() # drone A target_dp = self.target.get_dock_port_state() # drone B self.rel_state = state2rel(self.state_chaser, self.state_target, chaser_dp, target_dp) self.done = False self.obs = np.zeros((240, 320, 3), dtype=np.uint8) self.t = 0.0 self.reward = 0.0 self.shaping = 0.0 self.last_shaping = 0.0 return self.obs def render(self, mode='human'): return None def close(self): return None def seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) return [seed]