def attitude_limit(self): """ limit attitude :return: """ att_overlimit = False euler = quat2euler(self.state[6:10]) r = euler[0] p = euler[1] y = euler[2] if np.abs(r) >= deg2rad(85): attitude_limited = euler2quat( np.array([np.sign(r) * deg2rad(85), p, y])) att_overlimit = True if np.abs(p) >= deg2rad(85): attitude_limited = euler2quat( np.array([r, np.sign(p) * deg2rad(85), y])) att_overlimit = True if np.abs(y) >= deg2rad(175): attitude_limited = euler2quat( np.array([r, p, np.sign(y) * deg2rad(175)])) att_overlimit = True if np.abs(r) <= deg2rad(85) and np.abs(p) <= deg2rad(85) and np.abs( y) <= deg2rad(175): attitude_limited = self.state[6:10] att_overlimit = False return att_overlimit, attitude_limited
def __init__(self): self.drone = Drone() self.state = np.zeros(13) self.steps_beyond_done = None self.pos_threshold = 1 self.vel_threshold = 0.1 ini_pos = np.array([0.0, 0.0, 5.0]) + np.random.uniform(-1, 1, (3, )) ini_att = euler2quat( np.array([deg2rad(0), deg2rad(0), 0]) + np.random.uniform(-0.2, 0.2, (3, ))) ini_angular_rate = np.array([0, deg2rad(0), 0]) self.ini_state = np.zeros(13) self.ini_state[0:3] = ini_pos self.ini_state[6:10] = ini_att self.ini_state[10:] = ini_angular_rate pos_des = np.array([0.0, 0.0, 5.0]) # [x, y, z] att_des = euler2quat(np.array([deg2rad(0), deg2rad(0), deg2rad(0)])) self.state_des = np.zeros(13) self.state_des[0:3] = pos_des self.state_des[6:10] = att_des low = self.drone.state_lim_low high = self.drone.state_lim_high self.action_space = spaces.Box(low=np.array([0.0, 0.0, 0.0, 0.0]), high=np.array([1.0, 1.0, 1.0, 1.0])) self.observation_space = spaces.Box(low=low, high=high) self.action_max = np.array([1.0, 1.0, 1.0, 1.0 ]) * self.drone.mass * self.drone.gravity self.seed()
def __init__(self): self.chaser = Drone() self.target = Drone() self.target_controller = controller(self.target.get_arm_length(), self.target.get_mass()) self.state_chaser = np.zeros(13) self.state_target = np.zeros(13) self.rel_state = np.zeros(12) self.t = 0 self.done = False self.reward = 0.0 self.shaping = 0.0 self.last_shaping = 0.0 # self.steps_beyond_done = None # Chaser Initial State chaser_ini_pos = np.array([8, -50, 5]) + np.random.uniform(-0.3, 0.3, (3,)) chaser_ini_vel = np.array([0, 0, 0])# + np.random.uniform(-0.1, 0.1, (3,)) chaser_ini_att = euler2quat(np.array([0.0, 0.0, 0.0]))# + np.random.uniform(-0.2, 0.2, (3,))) chaser_ini_angular_rate = np.array([0.0, 0.0, 0.0]) #+ np.random.uniform(-0.1, 0.1, (3,)) self.chaser_dock_port = np.array([0.1, 0.0, 0.0]) self.chaser_ini_state = np.zeros(13) self.chaser_ini_state[0:3] = chaser_ini_pos self.chaser_ini_state[3:6] = chaser_ini_vel self.chaser_ini_state[6:10] = chaser_ini_att self.chaser_ini_state[10:] = chaser_ini_angular_rate self.state_chaser = self.chaser.reset(self.chaser_ini_state, self.chaser_dock_port) # Target Initial State target_ini_pos = np.array([10, -50, 5]) target_ini_vel = np.array([0.0, 0.0, 0.0]) target_ini_att = euler2quat(np.array([0.0, 0.0, 0.0])) target_ini_angular_rate = np.array([0.0, 0.0, 0.0]) self.target_dock_port = np.array([-0.1, 0, 0]) self.target_ini_state = np.zeros(13) self.target_ini_state[0:3] = target_ini_pos self.target_ini_state[3:6] = target_ini_vel self.target_ini_state[6:10] = target_ini_att self.target_ini_state[10:] = target_ini_angular_rate self.state_target = self.target.reset(self.target_ini_state, self.target_dock_port) # Target Final State target_pos_des = np.array([10, -50, 5]) # [x, y, z] target_att_des = euler2quat(np.array([0.0, 0.0, 0.0])) self.target_state_des = np.zeros(13) self.target_state_des[0:3] = target_pos_des self.target_state_des[6:10] = target_att_des # Final Relative Error self.rel_pos_threshold = 1 self.rel_vel_threshold = 0.1 self.rel_att_threshold = np.array([deg2rad(0), deg2rad(0), deg2rad(0)]) self.rel_att_rate_threshold = np.array([deg2rad(0), deg2rad(0), deg2rad(0)]) # chaser_dp = self.chaser.get_dock_port_state() # drone A # target_dp = self.target.get_dock_port_state() # drone B self.rel_state = state2rel(self.state_chaser, self.state_target, self.chaser.get_dock_port_state(), self.target.get_dock_port_state()) # State Limitation chaser_low = self.chaser.state_lim_low chaser_high = self.chaser.state_lim_high target_low = self.target.state_lim_low target_high = self.target.state_lim_high # obs rel info: 12x1 [rel_pos, rel_vel, rel_rpy, rel_rpy_rate] self.obs_low = np.array( [-np.inf, -np.inf, -np.inf, -100, -100, -100, -np.pi, -np.pi / 2, -np.pi, -10 * np.pi, -10 * np.pi, -10 * np.pi]) self.obs_high = np.array( [np.inf, np.inf, np.inf, 100, 100, 100, np.pi, np.pi / 2, np.pi, 10 * np.pi, 10 * np.pi, 10 * np.pi]) # rel_low = np.array([60, 0, 100, 10, 10, 10, 1, 1, 1, 1, 10 * 2 * np.pi, 10 * 2 * np.pi, 10 * 2 * np.pi]) self.action_space = spaces.Box(low=np.array([-1.0, -1.0, -1.0, -1.0]), high=np.array([1.0, 1.0, 1.0, 1.0]), dtype=np.float32) self.observation_space = spaces.Box(low=self.obs_low, high=self.obs_high, dtype=np.float32) # self.action_max = np.array([1.0, 1.0, 1.0, 1.0]) * self.chaser.mass * self.chaser.gravity self.action_mean = np.array([1.0, 1.0, 1.0, 1.0]) * self.chaser.mass * self.chaser.gravity / 2.0 self.action_std = np.array([1.0, 1.0, 1.0, 1.0]) * self.chaser.mass * self.chaser.gravity / 2.0 self.seed()
def step(self, action): self.t += 1 old_state_target = self.state_target old_state_chaser = self.state_chaser old_rel_state = self.rel_state old_chaser_dp = self.chaser.get_dock_port_state() action_chaser = self.chaser.rotor2control @ (self.action_std * action[:] + self.action_mean) # action_chaser = self.chaser.rotor2control @ (self.action_max * action[:]) # action_target = action[4:] action_target = self.target_controller.PID(self.target_state_des, self.state_target) self.state_target = self.target.step(action_target) self.state_chaser = self.chaser.step(action_chaser) # dock port relative state chaser_dp = self.chaser.get_dock_port_state() # drone A target_dp = self.target.get_dock_port_state() # drone B self.rel_state = state2rel(self.state_chaser, self.state_target, chaser_dp, target_dp) # done_final = False # done_overlimit = False flag_docking = bool((np.linalg.norm(self.rel_state[0:3], 2) < 0.1) and (np.linalg.norm(self.rel_state[3:6], 2) < 0.1) and (np.abs(self.rel_state[6]) < deg2rad(10)) and (np.abs(self.rel_state[7]) < deg2rad(10)) and (np.abs(self.rel_state[8]) < deg2rad(10))) # and (np.linalg.norm(self.rel_state[9:], 2) < deg2rad(10)) # and (np.abs(self.rel_state[6]) < (deg2rad(10.0))) # and (np.abs(self.rel_state[7]) < (deg2rad(10.0))) # # and (deg2rad(95.0) > np.abs(self.rel_state[8]) > deg2rad(85.0))) # and (np.abs(self.rel_state[8]) < deg2rad(10.0)))\ done_overlimit = bool((np.linalg.norm(self.rel_state[0:3]) >= 3) or self.state_chaser[2] <= 0.1) # or np.abs(self.rel_state[6]) > (deg2rad(85.0)) # or np.abs(self.rel_state[7]) > (deg2rad(85.0)) # or np.abs(self.rel_state[8]) > (deg2rad(175.0))) # or (np.linalg.norm(self.rel_state[3:6], 2) > 10)) # or np.linalg.norm(self.rel_state[9:], 2) > 5 * np.pi) # done_overlimit = bool(self.state_chaser[2] <= 0.1) done_overtime = bool(self.t >= 600) # reward /= 1000.0 self.done = bool(done_overlimit or done_overtime) # enb b self.done=bool(done_overlimit) # self.done = done_overlimit reward_docked = 0 if flag_docking: reward_docked = +1.0 # + (0.02-np.linalg.norm(self.rel_state[0:3], 2)) \ # + (0.01-np.linalg.norm(self.rel_state[3:6], 2)) \ # + 0.1*(deg2rad(20.0) - np.linalg.norm(self.rel_state[6:9])) \ reward_action = np.linalg.norm(action[:], 2) self.shaping = - 10.0 * np.sqrt(np.sum(np.square(self.rel_state[0:3] / 3.0))) \ - 1.0 * np.sqrt(np.sum(np.square(self.rel_state[3:6]))) \ - 10.0 * np.sqrt(np.sum(np.square(self.rel_state[6:9] / np.pi))) \ - 1.0 * np.sqrt(np.sum(np.square(self.rel_state[9:]))) \ - 0.1 * reward_action + 1.0 * reward_docked self.reward = self.shaping - self.last_shaping self.last_shaping = self.shaping # tbc if self.done: # self.state_chaser[2] = old_state_chaser[2] # self.state_chaser[3:] = # self.chaser_ini_state[3:] # self.rel_state = state2rel(self.state_chaser, self.state_target, old_chaser_dp, # target_dp) # self.reset() reward = -1000.0 # * np.sum(np.square(self.rel_state[0:3])) # -0.01(x,j, # tb16) -0.1(x,ent +inf,k, tb17) -10(x,l,z_overlimit)-100.0 elif (not flag_docking) and (not self.done): # reward = - 0.002 * np.sum(np.square(self.rel_state[0:3]/100)) \ - 0.0002 * np.sum(np.square(self.rel_state[ # 3:6]/100)) \ - 0.002 * np.sum(np.square(self.rel_state[6:9]/10)) \ - 0.0002 * np.sum(np.square( # self.rel_state[9:]/10)) \ - 0.0002 * reward_action \ + 0.1 # reward = -0.5 # - 0.001 * np.abs( # self.rel_state[3]) - 0.001 * np.abs(self.rel_state[4]) - 0.001 * np.abs(self.rel_state[5]) \ elif # flag_docking: reward = reward_docked # - 0.01 * np.square(self.rel_state[0]) - 0.01 * np.square( # self.rel_state[1]) - 0.01 * np.square( # self.rel_state[2]) \ # - 0.01 * np.square(self.rel_state[6]) - # 0.01 * np.square(self.rel_state[7]) - 0.01 * np.square( # self.rel_state[8]) \ # - 0.01 * np.linalg.norm( # self.rel_state[9:], 2) else: reward = 0.0 raise AssertionError('Wrong Reward Signal') # reward += 0.1 * self.t info = {'chaser': self.state_chaser, 'target': self.state_target, 'flag_docking': flag_docking, 'done_overlimit': done_overlimit} return self.rel_state, self.reward, self.done, info
from dynamics.quadrotor import Drone from controller.PIDController import controller from utils.transform import quat2rot, rot2euler, euler2rot, rot2quat, rad2deg, deg2rad, quat2euler, euler2quat import numpy as np import matplotlib.pyplot as plt from mpl_toolkits.mplot3d import Axes3D # print(rot2quat(euler2rot(np.array([0, 0, 0])))) ini_pos = np.array([8, -50, 5]) ini_vel = np.array([0, 0, 0]) ini_att = euler2quat(np.array([deg2rad(0.0), deg2rad(0.0), deg2rad(0.0)])) ini_angular_rate = np.array([0, 0, 0]) ini_state = np.zeros(13) ini_state[0:3] = ini_pos ini_state[3:6] = ini_vel ini_state[6:10] = ini_att ini_state[10:] = ini_angular_rate pos_des = np.array([10, -50, 5]) # [x, y, z] vel_des = np.array([0, 0, 0]) att_des = euler2quat(np.array([deg2rad(0.0), deg2rad(0.0), deg2rad(0.0)])) state_des = np.zeros(13) state_des[0:3] = pos_des state_des[3:6] = vel_des state_des[6:10] = att_des # Initial a drone and set its initial state quad1 = Drone() quad1.reset(ini_state) control = controller(quad1.get_arm_length(), quad1.get_mass())
def step(self, action): # last_reward = self.reward # reward = 0.0 # shaping = 0.0 self.t += 1 old_state_target = self.info['target'] # self.state_target old_state_chaser = self.state_chaser old_rel_state = self.rel_state old_chaser_dp = self.chaser.get_dock_port_state() action_chaser = self.chaser.rotor2control @ ( self.action_std * action[:] + self.action_mean) # action_chaser = self.chaser.rotor2control @ (self.action_max * action[:]) # action_target = action[4:] action_target = self.target_controller.vel_controller( self.target_state_des, self.state_target, old_state_target) self.state_target = self.target.step(action_target) self.state_chaser = self.chaser.step(action_chaser) # dock port relative state chaser_dp = self.chaser.get_dock_port_state() # drone A target_dp = self.target.get_dock_port_state() # drone B self.rel_state = state2rel(self.state_chaser, self.state_target, chaser_dp, target_dp) # done_final = False # done_overlimit = False flag_docking = bool((np.linalg.norm(self.rel_state[0:3], 2) < 0.1) and (np.linalg.norm(self.rel_state[3:6], 2) < 0.1) and (np.abs(self.rel_state[6]) < deg2rad(10)) and (np.abs(self.rel_state[7]) < deg2rad(10)) and (np.abs(self.rel_state[8]) < deg2rad(10))) # and (np.linalg.norm(self.rel_state[9:], 2) < deg2rad(10)) # and (np.abs(self.rel_state[6]) < (deg2rad(10.0))) # and (np.abs(self.rel_state[7]) < (deg2rad(10.0))) # # and (deg2rad(95.0) > np.abs(self.rel_state[8]) > deg2rad(85.0))) # and (np.abs(self.rel_state[8]) < deg2rad(10.0)))\ done_overlimit = bool((np.linalg.norm(self.rel_state[0:3]) >= 10) or self.state_chaser[2] <= 0.1) # (np.linalg.norm(self.rel_state[0:3]) >= 3) # or self.state_chaser[2] <= 0.1) # or np.abs(self.rel_state[6]) > (deg2rad(85.0)) # or np.abs(self.rel_state[7]) > (deg2rad(85.0)) # or np.abs(self.rel_state[8]) > (deg2rad(175.0))) # or (np.linalg.norm(self.rel_state[3:6], 2) > 10)) # or np.linalg.norm(self.rel_state[9:], 2) > 5 * np.pi) # done_overlimit = bool(self.state_chaser[2] <= 0.1) done_overtime = bool(self.t >= 600) # reward /= 1000.0 self.done = bool( done_overlimit or done_overtime) # enb b self.done=bool(done_overlimit) # self.done = done_overlimit reward_docked = 0 if flag_docking: reward_docked = +1.0 # + (0.02-np.linalg.n orm(self.rel_state[0:3], 2)) \ # + (0.01-np.linalg.norm(self.rel_state[3:6], 2)) \ # + 0.1*(deg2rad(20.0) - np.linalg.norm(self.rel_state[6:9])) \ reward_action = np.linalg.norm(action[:], 2) # reward_action = np.sum(np.abs(action[:])) self.shaping = - 10.0 * np.sqrt(np.sum(np.square(self.rel_state[0:3] / 10))) \ - 1.0 * np.sqrt(np.sum(np.square(self.rel_state[3:6]))) \ - 10.0 * np.sqrt(np.sum(np.square(self.rel_state[6:9] / np.pi))) \ - 1.0 * np.sqrt(np.sum(np.square(self.rel_state[9:]))) \ - 0.1 * reward_action + 1.0 * reward_docked self.reward = self.shaping - self.last_shaping self.last_shaping = self.shaping # reward += 0.1 * self.t self.info = { 'chaser': self.state_chaser, 'target': self.state_target, 'flag_docking': flag_docking, 'done_overlimit': done_overlimit } return self.rel_state, self.reward, self.done, self.info
from dynamics.quadrotor import Drone from controller.PIDController import controller from utils.transform import quat2rot, rot2euler, euler2rot, rot2quat, rad2deg, deg2rad, quat2euler, euler2quat import numpy as np import matplotlib.pyplot as plt from mpl_toolkits.mplot3d import Axes3D from controller.JoystickController import RCInput import threading # print(rot2quat(euler2rot(np.array([0, 0, 0])))) ini_pos = np.array([0, 0, 0]) ini_att = euler2quat(np.array([deg2rad(0.0), deg2rad(0.0), deg2rad(0.0)])) ini_angular_rate = np.array([0, 0, deg2rad(0)]) ini_state = np.zeros(13) ini_state[0:3] = ini_pos ini_state[6:10] = ini_att ini_state[10:] = ini_angular_rate pos_des = np.array([0, 0, 0]) # [x, y, z] att_des = np.array([deg2rad(0.0), deg2rad(0.0), deg2rad(0.0)]) angular_rate_des = np.array([0, 0, deg2rad(0)]) state_des = np.zeros(13) state_des[0:3] = pos_des state_des[6:10] = euler2quat(att_des) state_des[10:] = angular_rate_des # Initial a drone and set its initial state quad1 = Drone() quad1.reset(ini_state) control = controller(quad1.get_arm_length(), quad1.get_mass())
def step(self, action): # last_reward = self.reward # reward = 0.0 # shaping = 0.0 self.t += 1 old_state_target = self.state_target old_state_chaser = self.state_chaser old_rel_state = self.rel_state old_chaser_dp = self.chaser.get_dock_port_state() action_chaser = self.chaser.rotor2control @ (self.action_std * action[:] + self.action_mean) # action_chaser = self.chaser.rotor2control @ (self.action_max * action[:]) action_target = self.target_controller.PID(self.target_state_des, self.state_target) self.state_target = self.target.step(action_target) self.state_chaser = self.chaser.step(action_chaser) self.chaser_pub_srv.send_state(int(self.t), self.state_chaser) self.target_pub_srv.send_state(int(self.t), self.state_target) img = ImageGrab.grab([0, 0, 1920, 1080]) # img.convert('L') # time.sleep(0.1) resize_img = img.resize((320, 240), Image.ANTIALIAS) bbb = np.array(resize_img) self.obs = bbb # dock port relative state chaser_dp = self.chaser.get_dock_port_state() # drone A target_dp = self.target.get_dock_port_state() # drone B self.rel_state = state2rel(self.state_chaser, self.state_target, chaser_dp, target_dp) # done_final = False # done_overlimit = False flag_docking = bool((np.linalg.norm(self.rel_state[0:3], 2) < 0.1) and (np.linalg.norm(self.rel_state[3:6], 2) < 0.1) and (np.abs(self.rel_state[6]) < deg2rad(10)) and (np.abs(self.rel_state[7]) < deg2rad(10)) and (np.abs(self.rel_state[8]) < deg2rad(10))) done_overlimit = bool((np.linalg.norm(self.rel_state[0:3]) >= 3) or self.state_chaser[2] <= 0.1) done_overtime = bool(self.t >= 600) self.done = bool(done_overlimit or done_overtime) reward_docked = 0 if flag_docking: reward_docked = +1.0 reward_action = np.linalg.norm(action[:], 2) self.shaping = - 10.0 * np.sqrt(np.sum(np.square(self.rel_state[0:3] / 3.0))) \ - 1.0 * np.sqrt(np.sum(np.square(self.rel_state[3:6]))) \ - 10.0 * np.sqrt(np.sum(np.square(self.rel_state[6:9] / np.pi))) \ - 1.0 * np.sqrt(np.sum(np.square(self.rel_state[9:]))) \ - 0.1 * reward_action + 1.0 * reward_docked self.reward = self.shaping - self.last_shaping self.last_shaping = self.shaping # reward += 0.1 * self.t info = {'chaser': self.state_chaser, 'target': self.state_target, 'flag_docking': flag_docking, 'done_overlimit': done_overlimit} return self.obs, self.reward, self.done, info
from dynamics.quadrotor import Drone from controller.PIDController import controller from utils.transform import quat2rot, rot2euler, euler2rot, rot2quat, rad2deg, deg2rad import numpy as np import matplotlib.pyplot as plt from mpl_toolkits.mplot3d import Axes3D # print(rot2quat(euler2rot(np.array([0, 0, 0])))) ini_pos = np.array([0, 0, 10]) ini_att = rot2quat(euler2rot(np.array([deg2rad(0), deg2rad(0), 0]))) ini_angular_rate = np.array([0, deg2rad(0), 0]) ini_state_qd1 = np.zeros(13) ini_state_qd1[0:3] = ini_pos ini_state_qd1[6:10] = ini_att ini_state_qd1[10:] = ini_angular_rate ini_state_qd2 = np.zeros(13) ini_state_qd2[0:3] = ini_pos ini_state_qd2[6:10] = ini_att ini_state_qd2[10:] = ini_angular_rate att_des = rot2quat(euler2rot(np.array([deg2rad(0), deg2rad(0), deg2rad(0)]))) pos_des = np.array([0.1, 0, 8.2]) # [x, y, z] state_des = np.zeros(13) state_des[0:3] = pos_des state_des[6:10] = att_des # Initial a drone and set its initial state quad1 = Drone() quad1.reset(ini_state_qd1)
import gym from gym import wrappers, logger import numpy as np from controller.PIDController import controller from utils.transform import quat2rot, rot2euler, euler2rot, rot2quat, rad2deg, deg2rad, euler2quat att_des = euler2quat(np.array([deg2rad(0), deg2rad(0), deg2rad(0)])) pos_des = np.array([0, 0, 1]) # [x, y, z] state_des = np.zeros(13) state_des[0:3] = pos_des state_des[6:10] = att_des control = controller(0.086, 0.18) env = gym.make('gym_docking:hovering-v0') obs = env.reset() for i in range(1000): env.seed(0) action = control.PID(state_des, obs) obs, reward, dones, info = env.step(action) # print('obs: ', obs) print('reward: ', reward) print('dones: ', dones)