def __init__(self, sys_argv): super(App, self).__init__(sys_argv) self.model = Model() self.ctrl = Control(self.model) self.view = View(self.model, self.ctrl) self.view.show()
def main(): start_time = time.time() # Simulation Setup # --------------------------- Ti = 0 Ts = 0.005 Tf = 95 ifsave = 0 # Choose trajectory settings # --------------------------- ctrlOptions = ["xyz_pos", "xy_vel_z_pos", "xyz_vel"] trajSelect = np.zeros(3) # Select Control Type (0: xyz_pos, 1: xy_vel_z_pos, 2: xyz_vel) ctrlType = ctrlOptions[0] # Select Position Trajectory Type (0: hover, 1: pos_waypoint_timed, 2: pos_waypoint_arrived trajSelect[0] = 2 # Select Yaw Trajectory Type (0: none, 1: yaw_waypoint_timed, 2: yaw_waypoint_interp, 3: zero, 4: Follow) trajSelect[1] = 4 # Select if waypoint time is used, or if average speed is used to calculate waypoint time (0: waypoint time, 1: average speed) trajSelect[2] = 0 print("Control type: {}".format(ctrlType)) # Initialize Quadcopter, Controller, Wind, Result Matrixes # --------------------------- quad = Quadcopter(Ti) traj = Trajectory(quad, ctrlType, trajSelect) potfld = PotField(1) ctrl = Control(quad, traj.yawType) wind = Wind('None', 2.0, 90, -15) # Trajectory for First Desired States # --------------------------- traj.desiredState(0, Ts, quad) # First Potential Field Calculation # --------------------------- potfld.isWithinRange(quad) potfld.isWithinField(quad) potfld.rep_force(quad, traj) # Generate First Commands # --------------------------- ctrl.controller(traj, quad, potfld, Ts) # Initialize Result Matrixes # --------------------------- numTimeStep = int(Tf / Ts + 1) t_all = np.zeros(numTimeStep) s_all = np.zeros([numTimeStep, len(quad.state)]) pos_all = np.zeros([numTimeStep, len(quad.pos)]) vel_all = np.zeros([numTimeStep, len(quad.vel)]) quat_all = np.zeros([numTimeStep, len(quad.quat)]) omega_all = np.zeros([numTimeStep, len(quad.omega)]) euler_all = np.zeros([numTimeStep, len(quad.euler)]) sDes_traj_all = np.zeros([numTimeStep, len(traj.sDes)]) sDes_calc_all = np.zeros([numTimeStep, len(ctrl.sDesCalc)]) w_cmd_all = np.zeros([numTimeStep, len(ctrl.w_cmd)]) wMotor_all = np.zeros([numTimeStep, len(quad.wMotor)]) thr_all = np.zeros([numTimeStep, len(quad.thr)]) tor_all = np.zeros([numTimeStep, len(quad.tor)]) notInRange_all = np.zeros([numTimeStep, potfld.num_points], dtype=bool) inRange_all = np.zeros([numTimeStep, potfld.num_points], dtype=bool) inField_all = np.zeros([numTimeStep, potfld.num_points], dtype=bool) minDist_all = np.zeros(numTimeStep) t_all[0] = Ti s_all[0, :] = quad.state pos_all[0, :] = quad.pos vel_all[0, :] = quad.vel quat_all[0, :] = quad.quat omega_all[0, :] = quad.omega euler_all[0, :] = quad.euler sDes_traj_all[0, :] = traj.sDes sDes_calc_all[0, :] = ctrl.sDesCalc w_cmd_all[0, :] = ctrl.w_cmd wMotor_all[0, :] = quad.wMotor thr_all[0, :] = quad.thr tor_all[0, :] = quad.tor notInRange_all[0, :] = potfld.notWithinRange inRange_all[0, :] = potfld.inRangeNotField inField_all[0, :] = potfld.withinField minDist_all[0] = potfld.distanceMin # Run Simulation # --------------------------- t = Ti i = 1 while round(t, 3) < Tf: t = quad_sim(t, Ts, quad, ctrl, wind, traj, potfld) # print("{:.3f}".format(t)) t_all[i] = t s_all[i, :] = quad.state pos_all[i, :] = quad.pos vel_all[i, :] = quad.vel quat_all[i, :] = quad.quat omega_all[i, :] = quad.omega euler_all[i, :] = quad.euler sDes_traj_all[i, :] = traj.sDes sDes_calc_all[i, :] = ctrl.sDesCalc w_cmd_all[i, :] = ctrl.w_cmd wMotor_all[i, :] = quad.wMotor thr_all[i, :] = quad.thr tor_all[i, :] = quad.tor notInRange_all[i, :] = potfld.notWithinRange inRange_all[i, :] = potfld.inRangeNotField inField_all[i, :] = potfld.withinField minDist_all[i] = potfld.distanceMin i += 1 end_time = time.time() print("Simulated {:.2f}s in {:.6f}s.".format(t, end_time - start_time)) # View Results # --------------------------- def figures(): utils.makeFigures(quad.params, t_all, pos_all, vel_all, quat_all, omega_all, euler_all, w_cmd_all, wMotor_all, thr_all, tor_all, sDes_traj_all, sDes_calc_all, potfld, minDist_all) plt.show() utils.third_PV_animation(t_all, traj.wps, pos_all, quat_all, euler_all, sDes_traj_all, Ts, quad.params, traj.xyzType, traj.yawType, potfld, notInRange_all, inRange_all, inField_all, ifsave, figures)
def main(): start_time = time.time() # Simulation Setup # --------------------------- Ti = 0 Ts = 0.005 Tf = 18 ifsave = 1 # Choose trajectory settings # --------------------------- ctrlOptions = ["xyz_pos", "xy_vel_z_pos", "xyz_vel"] trajSelect = np.zeros(3) # Select Control Type (0: xyz_pos, 1: xy_vel_z_pos, 2: xyz_vel) ctrlType = ctrlOptions[0] # Select Position Trajectory Type (0: hover, 1: pos_waypoint_timed, 2: pos_waypoint_interp, # 3: minimum velocity 4: minimum accel, 5: minimum jerk, 6: minimum snap # 7: minimum accel_stop 8: minimum jerk_stop 9: minimum snap_stop # 10: minimum jerk_full_stop 11: minimum snap_full_stop # 12: pos_waypoint_arrived trajSelect[0] = 6 # (changed): was 6 # Select Yaw Trajectory Type (0: none 1: yaw_waypoint_timed, 2: yaw_waypoint_interp 3: follow 4: zero) trajSelect[1] = 6 # (changed): was 6 # Select if waypoint time is used, or if average speed is used to calculate waypoint time (0: waypoint time, 1: average speed) trajSelect[2] = 1 print("Control type: {}".format(ctrlType)) # Initialize Quadcopter, Controller, Wind, Result Matrixes # --------------------------- quad = Quadcopter(Ti) traj = Trajectory(quad, ctrlType, trajSelect) ctrl = Control(quad, traj.yawType) wind = Wind("None", 2.0, 90, -15) # Trajectory for First Desired States # --------------------------- sDes = traj.desiredState(0, Ts, quad) # np.zeros(19) # Generate First Commands # --------------------------- ctrl.controller(traj, quad, sDes, Ts) # Initialize Result Matrixes # --------------------------- numTimeStep = int(Tf / Ts + 1) ( t_all, s_all, pos_all, vel_all, quat_all, omega_all, euler_all, sDes_traj_all, sDes_calc_all, w_cmd_all, wMotor_all, thr_all, tor_all, ) = init_data(quad, traj, ctrl, numTimeStep) t_all[0] = Ti s_all[0, :] = quad.state pos_all[0, :] = quad.pos vel_all[0, :] = quad.vel quat_all[0, :] = quad.quat omega_all[0, :] = quad.omega euler_all[0, :] = quad.euler sDes_traj_all[0, :] = traj.sDes sDes_calc_all[0, :] = ctrl.sDesCalc w_cmd_all[0, :] = ctrl.w_cmd wMotor_all[0, :] = quad.wMotor thr_all[0, :] = quad.thr tor_all[0, :] = quad.tor # Run Simulation # --------------------------- t = Ti i = 1 while round(t, 3) < Tf: t = quad_sim(t, Ts, quad, ctrl, wind, traj) # print("{:.3f}".format(t)) t_all[i] = t s_all[i, :] = quad.state pos_all[i, :] = quad.pos vel_all[i, :] = quad.vel quat_all[i, :] = quad.quat omega_all[i, :] = quad.omega euler_all[i, :] = quad.euler sDes_traj_all[i, :] = traj.sDes sDes_calc_all[i, :] = ctrl.sDesCalc w_cmd_all[i, :] = ctrl.w_cmd wMotor_all[i, :] = quad.wMotor thr_all[i, :] = quad.thr tor_all[i, :] = quad.tor i += 1 end_time = time.time() print("Simulated {:.2f}s in {:.6f}s.".format(t, end_time - start_time)) # View Results # --------------------------- # utils.fullprint(sDes_traj_all[:,3:6]) utils.makeFigures( quad.params, t_all, pos_all, vel_all, quat_all, omega_all, euler_all, w_cmd_all, wMotor_all, thr_all, tor_all, sDes_traj_all, sDes_calc_all, ) ani = utils.sameAxisAnimation( t_all, traj.wps, pos_all, quat_all, sDes_traj_all, Ts, quad.params, traj.xyzType, traj.yawType, ifsave ) plt.show()
# 3: minimum velocity 4: minimum accel, 5: minimum jerk, 6: minimum snap # 7: minimum accel_stop 8: minimum jerk_stop 9: minimum snap_stop # 10: minimum jerk_full_stop 11: minimum snap_full_stop # 12: pos_waypoint_arrived trajSelect[0] = 6 # (changed): was 6 # Select Yaw Trajectory Type (0: none 1: yaw_waypoint_timed, 2: yaw_waypoint_interp 3: follow 4: zero) trajSelect[1] = 6 # (changed): was 6 # Select if waypoint time is used, or if average speed is used to calculate waypoint time (0: waypoint time, 1: average speed) trajSelect[2] = 0 print("Control type: {}".format(ctrlType)) # Initialize Quadcopter, Controller, Wind, Result Matrixes # --------------------------- quad = Quadcopter(Ti) traj = Trajectory(quad, ctrlType, trajSelect) ctrl = Control(quad, traj.yawType) wind = Wind("None", 2.0, 90, -15) # Trajectory for First Desired States # --------------------------- sDes = traj.desiredState(0, Ts, quad) # np.zeros(19) # Generate First Commands # --------------------------- ctrl.controller(traj, quad, sDes, Ts) # Initialize Result Matrixes # --------------------------- numTimeStep = int(Tf / Ts + 1) t_all, s_all, pos_all, vel_all, quat_all, omega_all, euler_all, sDes_traj_all, sDes_calc_all,\ w_cmd_all, wMotor_all, thr_all, tor_all = init_data(quad, traj, ctrl, numTimeStep)
class CustomEnv(gym.Env, ABC): def __init__(self): self.default_range = default_range = (-10, 10) self.velocity_range = velocity_range = (-10, 10) self.ang_velocity_range = ang_velocity_range = (-1, 1) self.q_range = qrange = (-0.3, 0.3) self.motor_range = motor_range = (400, 700) self.motor_d_range = motor_d_range = (-2000, 2000) self.observation_space_domain = { "x": default_range, "y": default_range, "z": default_range, "q0": (0.9, 1), "q1": qrange, "q2": qrange, "q3": qrange, "u": velocity_range, "v": velocity_range, "w": velocity_range, "p": ang_velocity_range, "q": ang_velocity_range, "r": ang_velocity_range, "wM1": motor_range, "wdotM1": motor_d_range, "wM2": motor_range, "wdotM2": motor_d_range, "wM3": motor_range, "wdotM3": motor_d_range, "wM4": motor_range, "wdotM4": motor_d_range, "deltacol_p": motor_range, "deltalat_p": motor_range, "deltalon_p": motor_range, "deltaped_p": motor_range, } self.states_str = list(self.observation_space_domain.keys()) # observation space to [-1,1] self.low_obs_space = np.array(tuple( zip(*self.observation_space_domain.values()))[0], dtype=float) * 0 - 1 self.high_obs_space = np.array(tuple( zip(*self.observation_space_domain.values()))[1], dtype=float) * 0 + 1 self.observation_space = spaces.Box(low=self.low_obs_space, high=self.high_obs_space, dtype=float) self.default_act_range = default_act_range = motor_range self.action_space_domain = { "deltacol": default_act_range, "deltalat": default_act_range, "deltalon": default_act_range, "deltaped": default_act_range, # "f1": (0.1, 5), "f2": (0.5, 20), "f3": (0.5, 20), "f4": (0.5, 10), # "lambda1": (0.5, 10), "lambda2": (0.1, 5), "lambda3": (0.1, 5), "lambda4": (0.1, 5), # "eta1": (0.2, 5), "eta2": (0.1, 5), "eta3": (0.1, 5), "eta4": (0.1, 5), } self.low_action = np.array(tuple( zip(*self.action_space_domain.values()))[0], dtype=float) self.high_action = np.array(tuple( zip(*self.action_space_domain.values()))[1], dtype=float) self.low_action_space = self.low_action * 0 - 1 self.high_action_space = self.high_action * 0 + 1 self.action_space = spaces.Box(low=self.low_action_space, high=self.high_action_space, dtype=float) self.min_reward = -11 self.high_action_diff = 0.2 obs_header = str(list(self.observation_space_domain.keys()))[1:-1] act_header = str(list(self.action_space_domain.keys()))[1:-1] self.header = "time, " + act_header + ", " + obs_header + ", reward" + ", control reward" self.saver = save_files() self.reward_array = np.array((0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), dtype=float) self.constant_dict = { "u": 0.0, "v": 0.0, "w": 0.0, "p": 0.0, "q": 0.0, "r": 0.0, "fi": 0.0, "theta": 0.0, "si": 0.0, "x": 0.0, "y": 0.0, "z": 0.0, "a": 0.0, "b": 0.0, "c": 0.0, "d": 0.0, } self.start_time = time.time() self.Ti = 0 self.Ts = 0.005 self.Tf = 5 self.reward_check_time = 0.7 * self.Tf self.ifsave = 0 self.ctrlOptions = ["xyz_pos", "xy_vel_z_pos", "xyz_vel"] self.trajSelect = np.zeros(3) self.ctrlType = self.ctrlOptions[0] self.trajSelect[0] = 0 self.trajSelect[1] = 4 self.trajSelect[2] = 0 self.quad = Quadcopter(self.Ti) self.traj = Trajectory(self.quad, self.ctrlType, self.trajSelect) self.ctrl = Control(self.quad, self.traj.yawType) self.wind = Wind("None", 2.0, 90, -15) self.numTimeStep = int(self.Tf / self.Ts + 1) self.longest_num_step = 0 self.best_reward = float("-inf") self.diverge_counter = 0 self.diverge_list = [] def reset(self): # initialization self.t = 0 self.sDes = self.traj.desiredState(self.t, self.Ts, self.quad) self.ctrl.controller(self.traj, self.quad, self.sDes, self.Ts) self.control_input = self.ctrl.w_cmd.copy() ( self.t_all, self.s_all, self.pos_all, self.vel_all, self.quat_all, self.omega_all, self.euler_all, self.sDes_traj_all, self.sDes_calc_all, self.w_cmd_all, self.wMotor_all, self.thr_all, self.tor_all, ) = init_data(self.quad, self.traj, self.ctrl, self.numTimeStep) self.all_actions = np.zeros( (self.numTimeStep, len(self.high_action_space))) self.control_rewards = np.zeros((self.numTimeStep, 1)) self.all_rewards = np.zeros((self.numTimeStep, 1)) self.t_all[0] = self.Ti observation = self.quad.state.copy() observation = np.concatenate((observation, self.control_input), axis=0) self.pos_all[0, :] = self.quad.pos self.vel_all[0, :] = self.quad.vel self.quat_all[0, :] = self.quad.quat self.omega_all[0, :] = self.quad.omega self.euler_all[0, :] = self.quad.euler self.sDes_traj_all[0, :] = self.traj.sDes self.sDes_calc_all[0, :] = self.ctrl.sDesCalc self.w_cmd_all[0, :] = self.control_input self.wMotor_all[0, :] = self.quad.wMotor self.thr_all[0, :] = self.quad.thr self.tor_all[0, :] = self.quad.tor self.counter = 1 self.save_counter = 0 self.find_next_state() self.jj = 0 # self.quad.state[0:12] = self.initial_states = list((np.random.rand(12) * 0.02 - 0.01)) self.quad.state[0:12] = [0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0] self.done = False for iii in range(len(self.high_obs_space)): current_range = self.observation_space_domain[self.states_str[iii]] observation[iii] = 2 * (observation[iii] - current_range[0]) / ( current_range[1] - current_range[0]) - 1 self.s_all[0, :] = observation self.integral_error = 0 return list(observation) def action_wrapper(self, current_action: list) -> np.array: current_action = np.clip(current_action, -1, 1) self.normilized_actions = current_action self.control_input = ((current_action + 1) * (self.high_action - self.low_action) / 2 + self.low_action) def find_next_state(self) -> list: self.quad.update(self.t, self.Ts, self.control_input, self.wind) # self.control_input = self.ctrl.w_cmd # self.quad.update(self.t, self.Ts, self.ctrl.w_cmd, self.wind) self.t += self.Ts # self.control_input = self.ctrl.w_cmd.copy() self.sDes = self.traj.desiredState(self.t, self.Ts, self.quad) # print('state', self.quad.state) # print(' ctrl', self.ctrl.w_cmd) self.ctrl.controller(self.traj, self.quad, self.sDes, self.Ts) def observation_function(self) -> list: i = self.counter quad = self.quad self.t_all[i] = self.t self.s_all[i, :] = np.concatenate((quad.state, self.control_input), axis=0) self.pos_all[i, :] = quad.pos self.vel_all[i, :] = quad.vel self.quat_all[i, :] = quad.quat self.omega_all[i, :] = quad.omega self.euler_all[i, :] = quad.euler self.sDes_traj_all[i, :] = self.traj.sDes self.sDes_calc_all[i, :] = self.ctrl.sDesCalc self.w_cmd_all[i, :] = self.control_input self.wMotor_all[i, :] = quad.wMotor self.thr_all[i, :] = quad.thr self.tor_all[i, :] = quad.tor observation = quad.state.copy() observation = np.concatenate((observation, self.control_input), axis=0) for iii in range(len(self.high_obs_space)): current_range = self.observation_space_domain[self.states_str[iii]] observation[iii] = 2 * (observation[iii] - current_range[0]) / ( current_range[1] - current_range[0]) - 1 self.s_all[i, :] = observation return list(observation) def reward_function(self, observation) -> float: # add reward slope to the reward # TODO: normalizing reward # TODO: adding reward gap # error_x = np.linalg.norm((abs(self.quad.state[0:3]).reshape(12)), 1) # error_ang = np.linalg.norm((abs(self.quad.state[3:7]).reshape(12)), 1) # error_v = np.linalg.norm((abs(self.quad.state[7:10]).reshape(12)), 1) # error_vang = np.linalg.norm((abs(self.quad.state[10:12]).reshape(12)), 1) # error = -np.linalg.norm((abs(self.s_all[self.counter, 0:12]).reshape(12)), 1) + 0.05 error = 10 * (-np.linalg.norm(observation[0:12], 1) + 1) self.control_rewards[self.counter] = error self.integral_error = 0.1 * ( 0.99 * self.control_rewards[self.counter - 1] + 0.99 * self.integral_error) reward = error.copy() reward += self.integral_error reward += -self.min_reward / self.numTimeStep reward -= 0.000001 * sum( abs(self.control_input - self.w_cmd_all[self.counter - 1, :])) reward += -0.0001 * np.linalg.norm(self.normilized_actions, 2) self.all_rewards[self.counter] = reward # for i in range(12): # self.reward_array[i] = abs(self.current_states[i]) / self.default_range[1] # reward = self.all_rewards[self.counter] = -sum(self.reward_array) + 0.17 / self.default_range[1] # # control reward # reward += 0.05 * float( # self.control_rewards[self.counter] - self.control_rewards[self.counter - 1] # ) # control slope # reward += -0.005 * sum(abs(self.all_actions[self.counter])) # input reward # for i in (self.high_action_diff - self.all_actions[self.counter] - self.all_actions[self.counter - 1]) ** 2: # reward += -min(0, i) if self.counter > 100: print(reward) return reward def check_diverge(self) -> bool: for i in range(len(self.high_obs_space)): if (abs(self.s_all[self.counter, i])) > self.high_obs_space[i]: self.diverge_list.append( (tuple(self.observation_space_domain.keys())[i], self.quad.state[i])) self.saver.diverge_save( tuple(self.observation_space_domain.keys())[i], self.quad.state[i]) self.diverge_counter += 1 if self.diverge_counter == 2000: self.diverge_counter = 0 print((tuple(self.observation_space_domain.keys())[i], self.quad.state[i])) self.jj = 1 return True if self.counter >= self.numTimeStep - 1: # number of timesteps print("successful") return True # after self.reward_check_time it checks whether or not the reward is decreasing # if self.counter > self.reward_check_time / self.Ts: # prev_time = int(self.counter - self.reward_check_time / self.Ts) # diverge_criteria = ( # self.all_rewards[self.counter] - sum(self.all_rewards[prev_time:prev_time - 10]) / 10 # ) # if diverge_criteria < -1: # print("reward_diverge") # self.jj = 1 # return True bool_1 = any(np.isnan(self.quad.state)) bool_2 = any(np.isinf(self.quad.state)) if bool_1 or bool_2: self.jj = 1 print("state_inf_nan_diverge") return False def done_jobs(self) -> None: counter = self.counter self.save_counter += 1 current_total_reward = sum(self.all_rewards) if self.save_counter >= 100: print("current_total_reward: ", current_total_reward) self.save_counter = 0 self.saver.reward_step_save(self.best_reward, self.longest_num_step, current_total_reward, counter) if counter >= self.longest_num_step: self.longest_num_step = counter if current_total_reward >= self.best_reward: self.best_reward = sum(self.all_rewards) ii = self.counter + 1 self.saver.best_reward_save( self.t_all[0:ii], self.w_cmd_all[0:ii], self.s_all[0:ii], self.all_rewards[0:ii], self.control_rewards[0:ii], self.header, ) def step(self, current_action): self.action_wrapper(current_action) try: self.find_next_state() except OverflowError or ValueError or IndexError: self.jj = 1 observation = self.observation_function() reward = self.reward_function(observation) self.done = self.check_diverge() if self.jj == 1: reward = self.min_reward if self.done: self.done_jobs() self.counter += 1 # self.make_constant(list(self.constant_dict.values())) return list(observation), float(reward), self.done, {} def make_constant(self, true_list): for i in range(len(true_list)): if i == 1: self.quad.state[i] = self.initial_states[i]
def __init__(self): self.default_range = default_range = (-10, 10) self.velocity_range = velocity_range = (-10, 10) self.ang_velocity_range = ang_velocity_range = (-1, 1) self.q_range = qrange = (-0.3, 0.3) self.motor_range = motor_range = (400, 700) self.motor_d_range = motor_d_range = (-2000, 2000) self.observation_space_domain = { "x": default_range, "y": default_range, "z": default_range, "q0": (0.9, 1), "q1": qrange, "q2": qrange, "q3": qrange, "u": velocity_range, "v": velocity_range, "w": velocity_range, "p": ang_velocity_range, "q": ang_velocity_range, "r": ang_velocity_range, "wM1": motor_range, "wdotM1": motor_d_range, "wM2": motor_range, "wdotM2": motor_d_range, "wM3": motor_range, "wdotM3": motor_d_range, "wM4": motor_range, "wdotM4": motor_d_range, "deltacol_p": motor_range, "deltalat_p": motor_range, "deltalon_p": motor_range, "deltaped_p": motor_range, } self.states_str = list(self.observation_space_domain.keys()) # observation space to [-1,1] self.low_obs_space = np.array(tuple( zip(*self.observation_space_domain.values()))[0], dtype=float) * 0 - 1 self.high_obs_space = np.array(tuple( zip(*self.observation_space_domain.values()))[1], dtype=float) * 0 + 1 self.observation_space = spaces.Box(low=self.low_obs_space, high=self.high_obs_space, dtype=float) self.default_act_range = default_act_range = motor_range self.action_space_domain = { "deltacol": default_act_range, "deltalat": default_act_range, "deltalon": default_act_range, "deltaped": default_act_range, # "f1": (0.1, 5), "f2": (0.5, 20), "f3": (0.5, 20), "f4": (0.5, 10), # "lambda1": (0.5, 10), "lambda2": (0.1, 5), "lambda3": (0.1, 5), "lambda4": (0.1, 5), # "eta1": (0.2, 5), "eta2": (0.1, 5), "eta3": (0.1, 5), "eta4": (0.1, 5), } self.low_action = np.array(tuple( zip(*self.action_space_domain.values()))[0], dtype=float) self.high_action = np.array(tuple( zip(*self.action_space_domain.values()))[1], dtype=float) self.low_action_space = self.low_action * 0 - 1 self.high_action_space = self.high_action * 0 + 1 self.action_space = spaces.Box(low=self.low_action_space, high=self.high_action_space, dtype=float) self.min_reward = -11 self.high_action_diff = 0.2 obs_header = str(list(self.observation_space_domain.keys()))[1:-1] act_header = str(list(self.action_space_domain.keys()))[1:-1] self.header = "time, " + act_header + ", " + obs_header + ", reward" + ", control reward" self.saver = save_files() self.reward_array = np.array((0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), dtype=float) self.constant_dict = { "u": 0.0, "v": 0.0, "w": 0.0, "p": 0.0, "q": 0.0, "r": 0.0, "fi": 0.0, "theta": 0.0, "si": 0.0, "x": 0.0, "y": 0.0, "z": 0.0, "a": 0.0, "b": 0.0, "c": 0.0, "d": 0.0, } self.start_time = time.time() self.Ti = 0 self.Ts = 0.005 self.Tf = 5 self.reward_check_time = 0.7 * self.Tf self.ifsave = 0 self.ctrlOptions = ["xyz_pos", "xy_vel_z_pos", "xyz_vel"] self.trajSelect = np.zeros(3) self.ctrlType = self.ctrlOptions[0] self.trajSelect[0] = 0 self.trajSelect[1] = 4 self.trajSelect[2] = 0 self.quad = Quadcopter(self.Ti) self.traj = Trajectory(self.quad, self.ctrlType, self.trajSelect) self.ctrl = Control(self.quad, self.traj.yawType) self.wind = Wind("None", 2.0, 90, -15) self.numTimeStep = int(self.Tf / self.Ts + 1) self.longest_num_step = 0 self.best_reward = float("-inf") self.diverge_counter = 0 self.diverge_list = []
def main(): start_time = time.time() # Simulation Setup # --------------------------- Ti = 0 Ts = 0.005 Tf = 18 ifsave = 0 # Choose trajectory settings # --------------------------- ctrlOptions = ["xyz_pos", "xy_vel_z_pos", "xyz_vel"] trajSelect = np.zeros(3) # Select Control Type (0: xyz_pos, 1: xy_vel_z_pos, 2: xyz_vel) ctrlType = ctrlOptions[0] # Select Position Trajectory Type (0: hover, 1: pos_waypoint_timed, 2: pos_waypoint_interp, # 3: minimum velocity 4: minimum accel, 5: minimum jerk, 6: minimum snap # 7: minimum accel_stop 8: minimum jerk_stop 9: minimum snap_stop # 10: minimum jerk_full_stop 11: minimum snap_full_stop trajSelect[0] = 6 # Select Yaw Trajectory Type (0: none 1: yaw_waypoint_timed, 2: yaw_waypoint_interp 3: follow 4: zero) trajSelect[1] = 3 # Select if waypoint time is used, or if average speed is used to calculate waypoint time (0: waypoint time, 1: average speed) trajSelect[2] = 0 print("Control type: {}".format(ctrlType)) # Initialize Quadcopter, Controller, Wind, Result Matrixes # --------------------------- quad = Quadcopter(Ti) traj = Trajectory(quad, ctrlType, trajSelect) ctrl = Control(quad, traj.yawType) wind = Wind('None', 2.0, 90, -15) # Trajectory for First Desired States # --------------------------- sDes = traj.desiredState(0, Ts, quad) # Generate First Commands # --------------------------- ctrl.controller(traj, quad, sDes, Ts) # Initialize Result Matrixes # --------------------------- t_all = Ti s_all = quad.state.T pos_all = quad.pos.T vel_all = quad.vel.T quat_all = quad.quat.T omega_all = quad.omega.T euler_all = quad.euler.T sDes_traj_all = traj.sDes.T sDes_calc_all = ctrl.sDesCalc.T w_cmd_all = ctrl.w_cmd.T wMotor_all = quad.wMotor.T thr_all = quad.thr.T tor_all = quad.tor.T # Run Simulation # --------------------------- t = Ti i = 0 while round(t, 3) < Tf: t = quad_sim(t, Ts, quad, ctrl, wind, traj) # print("{:.3f}".format(t)) t_all = np.vstack((t_all, t)) s_all = np.vstack((s_all, quad.state.T)) pos_all = np.vstack((pos_all, quad.pos.T)) vel_all = np.vstack((vel_all, quad.vel.T)) quat_all = np.vstack((quat_all, quad.quat.T)) omega_all = np.vstack((omega_all, quad.omega.T)) euler_all = np.vstack((euler_all, quad.euler.T)) sDes_traj_all = np.vstack((sDes_traj_all, traj.sDes.T)) sDes_calc_all = np.vstack((sDes_calc_all, ctrl.sDesCalc.T)) w_cmd_all = np.vstack((w_cmd_all, ctrl.w_cmd.T)) wMotor_all = np.vstack((wMotor_all, quad.wMotor.T)) thr_all = np.vstack((thr_all, quad.thr.T)) tor_all = np.vstack((tor_all, quad.tor.T)) i += 1 end_time = time.time() print("Simulated {:.2f}s in {:.6f}s.".format(t, end_time - start_time)) # View Results # --------------------------- # utils.fullprint(sDes_traj_all[:,3:6]) utils.makeFigures(quad.params, t_all, pos_all, vel_all, quat_all, omega_all, euler_all, w_cmd_all, wMotor_all, thr_all, tor_all, sDes_traj_all, sDes_calc_all) ani = utils.sameAxisAnimation(t_all, traj.wps, pos_all, quat_all, sDes_traj_all, Ts, quad.params, traj.xyzType, traj.yawType, ifsave) plt.show()
def main(): start_time = time.time() # Import the simulation setup # --------------------------- config = simConfig.config() # Initialize wind (untested) # -------------------------- wind = Wind('None', 2.0, 90, -15) # Initialize list for objects # --------------------------- numTimeStep = int(config.Tf / config.Ts + 1) quadList = [] trajList = [] ctrlList = [] sDesList = [] obsPFList = [] myDataList = [] # For the number of vehicles # -------------------------- for objectIndex in range(0, config.nVeh): quadList.append(Quadcopter(config)) trajList.append( Trajectory(quadList[objectIndex], config.ctrlType, config.trajSelect, config)) ctrlList.append( Control(quadList[objectIndex], trajList[objectIndex].yawType)) sDesList.append(trajList[objectIndex].desiredState( 0, config.Ts, quadList[objectIndex])) obsPFList.append( pf(trajList[objectIndex], np.vstack((config.o1, config.o2, config.o3, quadList[objectIndex - 1].state[0:3])).transpose(), gamma=config.gamma, eta=config.eta, obsRad=config.obsRad, obsRange=config.obsRange)) # generate first command ctrlList[objectIndex].controller(trajList[objectIndex], quadList[objectIndex], sDesList[objectIndex], config) # Create learning object # --------------------------- fala = falaObj(config) # Initialize Result Matrixes # --------------------------- for objectIndex in range(0, config.nVeh): # initialize result matrices myDataList.append( collect.quadata(quadList[objectIndex], trajList[objectIndex], ctrlList[objectIndex], fala, config.Ti, numTimeStep)) # Run Simulation # --------------------------- t = config.Ti * np.ones(config.nVeh) i = 1 while round(t[0], 3) < config.Tf: # Update the obstacle positions # ----------------------------- if t[0] > 0.1: # recall these, as they could move with time o1 = config.o1 # np.array([-2.1, 0, -3],) # obstacle 1 (x,y,z) o2 = config.o2 # np.array([2, -1.2, 0.9]) # obstacle 2 (x,y,z) o3 = config.o3 # np.array([0, 2.5, -2.5]) # obstacle 2 (x,y,z) # if just one vehicle if config.nVeh == 1: obsPFList[0].Po = np.vstack((o1, o2, o3)).transpose() # if two vehicles, make sure to avoid other dudeBot #if config.nVeh == 2: # obsPFList[0].Po = np.vstack((o1,o2,o3,quadList[1].state[0:3])).transpose() # obsPFList[1].Po = np.vstack((o1,o2,o3,quadList[0].state[0:3])).transpose() # if more than one vehicle, make sure to avoid other dudeBot for io in range(0, config.nVeh): for jo in range(0, config.nVeh - 1): obsPFList[io].Po = np.vstack( (o1, o2, o3, quadList[io - jo].state[0:3])).transpose() # Integrate through the dynamics # ------------------------------ for objectIndex in range(0, config.nVeh): t[objectIndex] = quad_sim(t[objectIndex], config.Ts, quadList[objectIndex], ctrlList[objectIndex], wind, trajList[objectIndex], fala, obsPFList[objectIndex], config) myDataList[objectIndex].collect(t[objectIndex], quadList[objectIndex], trajList[objectIndex], ctrlList[objectIndex], fala, i) i += 1 end_time = time.time() print("Simulated {:.2f}s in {:.6f}s.".format(t[0], end_time - start_time)) # View Results # --------------------------- if config.ifsave: # make figures utils.makeFigures(quadList[0].params, myDataList[0]) # make animation (this should be generalized later) if config.ifsaveplots: if config.nVeh == 1: ani = sameAxisAnimation(config, myDataList[0], trajList[0], quadList[0].params, obsPFList[0], myColour='blue') #if config.nVeh == 2: # ani = sameAxisAnimation2(config, myDataList[0], trajList[0], quadList[0].params, myDataList[1], trajList[1], quadList[1].params, obsPFList[0], 'blue', 'green') if config.nVeh > 1: ani = sameAxisAnimationN(config, myDataList, trajList, quadList, obsPFList[0], 'blue') plt.show() # dump the learned parameters if config.doLearn: np.savetxt("Data/Qtable.csv", fala.Qtable, delimiter=",", header=" ") np.savetxt("Data/errors.csv", myDataList[0].falaError_all, delimiter=",", header=" ") # save energy draw torque0 = myDataList[0].tor_all eDraw0 = np.cumsum(np.cumsum(torque0, axis=0), axis=1)[:, 3] np.save('Data/energyDepletion_veh0', eDraw0) if config.nVeh == 2: torque1 = myDataList[1].tor_all eDraw1 = np.cumsum(np.cumsum(torque1, axis=0), axis=1)[:, 3] np.save('Data/energyDepletion_veh1', eDraw1) # save the data if config.ifsavedata: #save states x = myDataList[0].pos_all[:, 0] y = myDataList[0].pos_all[:, 1] z = myDataList[0].pos_all[:, 2] x_sp = myDataList[0].sDes_calc_all[:, 0] y_sp = myDataList[0].sDes_calc_all[:, 1] z_sp = myDataList[0].sDes_calc_all[:, 2] states0 = np.vstack([x, y, z, x_sp, y_sp, z_sp]).transpose() np.save('Data/states0', states0) if config.nVeh == 2: x = myDataList[1].pos_all[:, 0] y = myDataList[1].pos_all[:, 1] z = myDataList[1].pos_all[:, 2] x_sp = myDataList[1].sDes_calc_all[:, 0] y_sp = myDataList[1].sDes_calc_all[:, 1] z_sp = myDataList[1].sDes_calc_all[:, 2] states1 = np.vstack([x, y, z, x_sp, y_sp, z_sp]).transpose() np.save('Data/states1', states1)
def __init__(self, Ti, Tf, ctrlType, trajSelect, numTimeStep, Ts, quad_id=None, mode='ennemy', id_targ=-1, color='blue', pos_ini=[0, 0, 0], pos_goal=[1, 1, -1], pos_obs=None): ''' It is used to store in the basic attributes the inputs already mentioned in the previous sections. In the case of the one employed by Python simulation environment, the command for an initial stable hover and the initial state of the drone are initialized. For that, the dynamics, kinetics, kinematics and control are also loaded. Finally, the storage of drone data is pre-allocated. ''' # Quad Params # --------------------------- self.quad_id = quad_id self.pos_ini = pos_ini self.pos_goal = pos_goal self.pos_goal_ini = pos_goal self.pos_obs = pos_obs self.id_targ = id_targ self.pos_quads = None self.mode = mode self.mode_ini = mode self.neutralize = False self.ctrlType = ctrlType self.trajSelect = trajSelect self.params = sys_params() self.t_update = Ti self.t_track = 0 self.Ti = Ti self.Ts = Ts self.Tf = Tf # Command for initial stable hover # --------------------------- ini_hover = init_cmd(self.params) self.params["FF"] = ini_hover[0] # Feed-Forward Command for Hover self.params["w_hover"] = ini_hover[1] # Motor Speed for Hover self.params["thr_hover"] = ini_hover[2] # Motor Thrust for Hover self.thr = np.ones(4) * ini_hover[2] self.tor = np.ones(4) * ini_hover[3] # Initial State # --------------------------- self.state = init_state(self.params, pos_ini) self.pos = self.state[0:3] self.quat = self.state[3:7] self.vel = self.state[7:10] self.omega = self.state[10:13] self.wMotor = np.array( [self.state[13], self.state[15], self.state[17], self.state[19]]) self.vel_dot = np.zeros(3) self.omega_dot = np.zeros(3) self.acc = np.zeros(3) self.previous_pos_targ = self.pos self.previous_pos_us = self.pos self.extended_state() self.forces() # Set Integrator # --------------------------- self.integrator = ode(self.state_dot).set_integrator( 'dopri5', first_step='0.00005', atol='10e-6', rtol='10e-6') self.integrator.set_initial_value(self.state, Ti) self.traj = Trajectory(self.psi, ctrlType, trajSelect, pos_ini=self.pos, pos_goal=pos_goal, pos_obs=pos_obs, Tf=self.Tf, t_ini=0) self.ctrl = Control(self.params["w_hover"], self.traj.yawType) self.t_all = np.zeros(numTimeStep) self.s_all = np.zeros([numTimeStep, len(self.state)]) self.pos_all = np.zeros([numTimeStep, len(self.pos)]) self.vel_all = np.zeros([numTimeStep, len(self.vel)]) self.quat_all = np.zeros([numTimeStep, len(self.quat)]) self.omega_all = np.zeros([numTimeStep, len(self.omega)]) self.euler_all = np.zeros([numTimeStep, len(self.euler)]) self.sDes_traj_all = np.zeros([numTimeStep, len(self.traj.sDes)]) self.sDes_calc_all = np.zeros([numTimeStep, len(self.ctrl.sDesCalc)]) self.w_cmd_all = np.zeros([numTimeStep, len(self.ctrl.w_cmd)]) self.wMotor_all = np.zeros([numTimeStep, len(self.wMotor)]) self.thr_all = np.zeros([numTimeStep, len(self.thr)]) self.tor_all = np.zeros([numTimeStep, len(self.tor)]) self.color = color self.pf_map_all = np.empty(numTimeStep, dtype=list)
def update(self, t, Ts, wind, i, quad): ''' It is used to update the attributes of each drone. First of all, the Boolean must be changed into true for specific modes and to false for others following their nature. If the initial mode is “enemy”, no update is required as neither collision avoidance or tracking features are activated. However, in the python environment, if the target becomes neutralized and its mode is changed into “fall”, the goal position will be updated once as its initial goal position with null height to simulate its crash. This is done by the MAVlink command “stabilize” in the more complex environment. If the initial mode is not “enemy” and the Boolean is true, then update occurs. In the case of an agent in “guided” mode, it will update the position of other drones as dynamic obstacles to enhance collision avoidance. For that, it will compare the identification number of each drone and verify it does not match its own. Then, the temporal position of obstacles will be jointly stored containing the static and dynamic ones. After that, the guidance algorithm will be called and the trajectory updated. For an automated transition of modes, a distance threshold can be employed to consider the goal position reached. For that, the norm of the difference between the current position and the goal one is computed and compared with said threshold. If it is smaller, the mode can be switched to “home”. In the case of an agent in “track” mode, it will update both the position of other drones for collision avoidance and its goal position. For that, the collision avoidance happens similarly to “guided” mode although in this case, it will discard the identification numbers that match both its own and the target it is tasked to follow. As for the tracking aspect, the target position is assumed to be provided by the Situation Awareness team. However, in the initial stages it is extracted from the environment. For a basic tracking, the goal position is assumed to be the target position with an additional half meter in z axis, in order for the agent to hover over it. Nonetheless, this means that the agent will never be able to hover exactly over the target. For an improved tracking performance, the future position of the target is estimated using a constant velocity assumption. This discretized velocity is computed based on its current and past positions. This estimation can be further improved by including the continuous velocity, acceleration or by tuning its parameters. This estimation, after adding the hovering distance is employed for the path planning. In order to check whether or not a target has been neutralized two thresholds are needed, one associated with time and another one with the distance between the tracking agent and its target. In this project, the thresholds are set to 1m for 1.5s (three times the update time). If both are met, the Boolean neutralize becomes true. This one will later be used to change the target mode from “enemy” to “neutralized”. Once again, a normal automated transition for the agent would be “home” mode. In the case of the remaining modes associated with actions, the longer ones can also include collision avoidance as a feature and therefore require updating. This is the case of “home”, “land” or “hover”, but not the case of “charging” or “takeoff”. Nonetheless, in the more complex environment, most of these are predefined and can be directly sent as MAVlink commands. After the mode and goal position are updated, the trajectories are overwritten. For that, the current states are updated and the commands recomputed. ''' if (self.t_update > self.Tf) or (t == 0): update = True self.t_update = 0 else: update = False if self.mode == "fall": self.pos_goal = np.hstack([self.pos[0], self.pos[1], -0.5]).astype(float) self.traj = Trajectory(self.psi, self.ctrlType, self.trajSelect, pos_ini=self.pos, pos_goal=self.pos_goal, pos_obs=np.array([[0, 0, 0]]), Tf=self.Tf, t_ini=t) self.ctrl = Control(self.params["w_hover"], self.traj.yawType) self.mode = "neutralized" self.print_mode("ennemy", "fall") # suggestion for traking algos: if (self.mode != 'ennemy' and update): if self.mode == 'track': pos_dyn_obs = [ x[1] for x in self.pos_quads if (x[0] != self.id_targ and x[0] != self.quad_id) ] try: temp_pos_obs = np.vstack( (self.pos_obs, pos_dyn_obs)).astype(float) except: temp_pos_obs = np.vstack((self.pos_obs)).astype(float) pos_targ = [ x[1] for x in self.pos_quads if x[0] == self.id_targ ][0] # mix desired velocity vector with the velocity of the target # lectures on coop guidance P16 ==> modif for moving target (it will add a gain) # need to understand the guidance command we are using # derivative ==> more reactivness # otherwise moddif wp but stab issues # ask SA if they can provide us the velocity of the targets, better than discretize estimation estimate_pos_targ = 3 * pos_targ - 2 * self.previous_pos_targ self.pos_goal = np.hstack((estimate_pos_targ) + [0, 0, 1.5]).astype(float) dist = np.sqrt( (self.previous_pos_us[0] - self.previous_pos_targ[0])**2 + (self.previous_pos_us[1] - self.previous_pos_targ[1])**2 + (self.previous_pos_us[2] - self.previous_pos_targ[2])**2) print(dist) if dist < 1: self.t_track += Ts if self.t_track > 3 * Ts: self.neutralize = True self.mode = "home" self.print_mode("track", "home") self.t_track = t - Ts else: self.t_track = 0 self.previous_pos_targ = pos_targ self.previous_pos_us = self.pos if self.mode == 'guided': try: pos_dyn_obs = [ x[1] for x in self.pos_quads if (x[0] != self.quad_id) ] temp_pos_obs = np.vstack( (self.pos_obs, pos_dyn_obs)).astype(float) except: temp_pos_obs = np.vstack((self.pos_obs)).astype(float) dist = np.sqrt((self.pos[0] - self.pos_goal[0])**2 + (self.pos[1] - self.pos_goal[1])**2 + (self.pos[2] - self.pos_goal[2])**2) if dist < 1: self.mode = "home" self.print_mode("guided", "home") if self.mode == 'home': pos_dyn_obs = [ x[1] for x in self.pos_quads if (x[0] != self.quad_id) ] temp_pos_obs = np.vstack( (self.pos_obs, pos_dyn_obs)).astype(float) self.pos_goal = np.hstack(self.pos_ini).astype(float) dist = np.sqrt((self.pos[0] - self.pos_goal[0])**2 + (self.pos[1] - self.pos_goal[1])**2 + (self.pos[2] - self.pos_goal[2])**2) if dist < 1: self.mode = "land" self.print_mode("home", "land") if self.mode == 'land': pos_dyn_obs = [ x[1] for x in self.pos_quads if (x[0] != self.quad_id) ] temp_pos_obs = np.vstack( (self.pos_obs, pos_dyn_obs)).astype(float) self.pos_goal = np.hstack([self.pos[0], self.pos[1], -0.5]).astype(float) if self.mode == 'hover': pos_dyn_obs = [ x[1] for x in self.pos_quads if (x[0] != self.quad_id) ] temp_pos_obs = np.vstack( (self.pos_obs, pos_dyn_obs)).astype(float) self.pos_goal = np.hstack(self.pos).astype(float) self.traj = Trajectory(self.psi, self.ctrlType, self.trajSelect, pos_ini=self.pos, pos_goal=self.pos_goal, pos_obs=temp_pos_obs, Tf=self.Tf, t_ini=t) self.ctrl = Control(self.params["w_hover"], self.traj.yawType) prev_vel = self.vel prev_omega = self.omega self.sDes = self.traj.desiredState(t, Ts, self.pos) self.integrator.set_f_params(self.ctrl.w_cmd, wind) self.state = self.integrator.integrate(t, t + Ts) self.pos = self.state[0:3] self.quat = self.state[3:7] self.vel = self.state[7:10] self.omega = self.state[10:13] self.wMotor = np.array( [self.state[13], self.state[15], self.state[17], self.state[19]]) self.vel_dot = (self.vel - prev_vel) / Ts self.omega_dot = (self.omega - prev_omega) / Ts self.extended_state() self.forces() self.t_all[i] = t self.s_all[i, :] = self.state self.pos_all[i, :] = self.pos self.vel_all[i, :] = self.vel self.quat_all[i, :] = self.quat self.omega_all[i, :] = self.omega self.euler_all[i, :] = self.euler self.sDes_traj_all[i, :] = self.traj.sDes self.sDes_calc_all[i, :] = self.ctrl.sDesCalc self.w_cmd_all[i, :] = self.ctrl.w_cmd self.wMotor_all[i, :] = self.wMotor self.thr_all[i, :] = self.thr self.tor_all[i, :] = self.tor self.pf_map_all[i] = self.traj.data # Trajectory for Desired States # --------------------------- self.sDes = self.traj.desiredState(t, Ts, self.pos) # Generate Commands (for next iteration) # --------------------------- self.ctrl.controller(self.traj, quad, self.sDes, Ts) self.t_update += Ts
class Quadcopter: ''' In order to implement the drones, both agents and enemies, the Quad class was created. This is a file that is used to firstly initialize the drones as independent objects and to assign and update their attributes. This method of implementation allowed a very easy and intuitive use of the code, as well as an unlimited scalability of the SWARM. For the initialization to happen several inputs need to be provided. The main ones are the drone identification number “quad_id”, the keyword “mode”, the target identification number “id_targ”, the goal position “pos_goal” and the position of static obstacles “pos_obs”. ''' def __init__(self, Ti, Tf, ctrlType, trajSelect, numTimeStep, Ts, quad_id=None, mode='ennemy', id_targ=-1, color='blue', pos_ini=[0, 0, 0], pos_goal=[1, 1, -1], pos_obs=None): ''' It is used to store in the basic attributes the inputs already mentioned in the previous sections. In the case of the one employed by Python simulation environment, the command for an initial stable hover and the initial state of the drone are initialized. For that, the dynamics, kinetics, kinematics and control are also loaded. Finally, the storage of drone data is pre-allocated. ''' # Quad Params # --------------------------- self.quad_id = quad_id self.pos_ini = pos_ini self.pos_goal = pos_goal self.pos_goal_ini = pos_goal self.pos_obs = pos_obs self.id_targ = id_targ self.pos_quads = None self.mode = mode self.mode_ini = mode self.neutralize = False self.ctrlType = ctrlType self.trajSelect = trajSelect self.params = sys_params() self.t_update = Ti self.t_track = 0 self.Ti = Ti self.Ts = Ts self.Tf = Tf # Command for initial stable hover # --------------------------- ini_hover = init_cmd(self.params) self.params["FF"] = ini_hover[0] # Feed-Forward Command for Hover self.params["w_hover"] = ini_hover[1] # Motor Speed for Hover self.params["thr_hover"] = ini_hover[2] # Motor Thrust for Hover self.thr = np.ones(4) * ini_hover[2] self.tor = np.ones(4) * ini_hover[3] # Initial State # --------------------------- self.state = init_state(self.params, pos_ini) self.pos = self.state[0:3] self.quat = self.state[3:7] self.vel = self.state[7:10] self.omega = self.state[10:13] self.wMotor = np.array( [self.state[13], self.state[15], self.state[17], self.state[19]]) self.vel_dot = np.zeros(3) self.omega_dot = np.zeros(3) self.acc = np.zeros(3) self.previous_pos_targ = self.pos self.previous_pos_us = self.pos self.extended_state() self.forces() # Set Integrator # --------------------------- self.integrator = ode(self.state_dot).set_integrator( 'dopri5', first_step='0.00005', atol='10e-6', rtol='10e-6') self.integrator.set_initial_value(self.state, Ti) self.traj = Trajectory(self.psi, ctrlType, trajSelect, pos_ini=self.pos, pos_goal=pos_goal, pos_obs=pos_obs, Tf=self.Tf, t_ini=0) self.ctrl = Control(self.params["w_hover"], self.traj.yawType) self.t_all = np.zeros(numTimeStep) self.s_all = np.zeros([numTimeStep, len(self.state)]) self.pos_all = np.zeros([numTimeStep, len(self.pos)]) self.vel_all = np.zeros([numTimeStep, len(self.vel)]) self.quat_all = np.zeros([numTimeStep, len(self.quat)]) self.omega_all = np.zeros([numTimeStep, len(self.omega)]) self.euler_all = np.zeros([numTimeStep, len(self.euler)]) self.sDes_traj_all = np.zeros([numTimeStep, len(self.traj.sDes)]) self.sDes_calc_all = np.zeros([numTimeStep, len(self.ctrl.sDesCalc)]) self.w_cmd_all = np.zeros([numTimeStep, len(self.ctrl.w_cmd)]) self.wMotor_all = np.zeros([numTimeStep, len(self.wMotor)]) self.thr_all = np.zeros([numTimeStep, len(self.thr)]) self.tor_all = np.zeros([numTimeStep, len(self.tor)]) self.color = color self.pf_map_all = np.empty(numTimeStep, dtype=list) def extended_state(self): ''' Only for the python environment. It computes the rotation matrix of the current state and its Euler angles too. ''' # Rotation Matrix of current state (Direct Cosine Matrix) self.dcm = utils.quat2Dcm(self.quat) # Euler angles of current state YPR = utils.quatToYPR_ZYX(self.quat) self.euler = YPR[:: -1] # flip YPR so that euler state = phi, theta, psi self.psi = YPR[0] self.theta = YPR[1] self.phi = YPR[2] def forces(self): ''' Only for the python environment. It computes the rotor thrusts and torques. ''' # Rotor thrusts and torques self.thr = self.params["kTh"] * self.wMotor * self.wMotor self.tor = self.params["kTo"] * self.wMotor * self.wMotor def state_dot(self, t, state, cmd, wind): ''' Only for the python environment. It calls the dynamic parameters of each drones in terms of mass, inertia and damping. It also calls the aerodynamic and propulsion coefficients such as drag, thrust or torque. It imports the state vector of each drone, in terms of position, orientation, velocity or angular rate. After that, it studies the motor dynamics and rotor forces. The wind model is also called here. Finally, it solves the state derivatives. ''' # Import Params # --------------------------- mB = self.params["mB"] g = self.params["g"] dxm = self.params["dxm"] dym = self.params["dym"] IB = self.params["IB"] IBxx = IB[0, 0] IByy = IB[1, 1] IBzz = IB[2, 2] Cd = self.params["Cd"] kTh = self.params["kTh"] kTo = self.params["kTo"] tau = self.params["tau"] kp = self.params["kp"] damp = self.params["damp"] minWmotor = self.params["minWmotor"] maxWmotor = self.params["maxWmotor"] IRzz = self.params["IRzz"] if (config.usePrecession): uP = 1 else: uP = 0 # Import State Vector # --------------------------- x = state[0] y = state[1] z = state[2] q0 = state[3] q1 = state[4] q2 = state[5] q3 = state[6] xdot = state[7] ydot = state[8] zdot = state[9] p = state[10] q = state[11] r = state[12] wM1 = state[13] wdotM1 = state[14] wM2 = state[15] wdotM2 = state[16] wM3 = state[17] wdotM3 = state[18] wM4 = state[19] wdotM4 = state[20] # Motor Dynamics and Rotor forces (Second Order System: https://apmonitor.com/pdc/index.php/Main/SecondOrderSystems) # --------------------------- uMotor = cmd wddotM1 = (-2.0 * damp * tau * wdotM1 - wM1 + kp * uMotor[0]) / (tau** 2) wddotM2 = (-2.0 * damp * tau * wdotM2 - wM2 + kp * uMotor[1]) / (tau** 2) wddotM3 = (-2.0 * damp * tau * wdotM3 - wM3 + kp * uMotor[2]) / (tau** 2) wddotM4 = (-2.0 * damp * tau * wdotM4 - wM4 + kp * uMotor[3]) / (tau** 2) wMotor = np.array([wM1, wM2, wM3, wM4]) wMotor = np.clip(wMotor, minWmotor, maxWmotor) thrust = kTh * wMotor * wMotor torque = kTo * wMotor * wMotor ThrM1 = thrust[0] ThrM2 = thrust[1] ThrM3 = thrust[2] ThrM4 = thrust[3] TorM1 = torque[0] TorM2 = torque[1] TorM3 = torque[2] TorM4 = torque[3] # Wind Model # --------------------------- [velW, qW1, qW2] = wind.randomWind(t) # velW = 0 # velW = 5 # m/s # qW1 = 0*deg2rad # Wind heading # qW2 = 60*deg2rad # Wind elevation (positive = upwards wind in NED, positive = downwards wind in ENU) # State Derivatives (from PyDy) This is already the analytically solved vector of MM*x = RHS # --------------------------- if (config.orient == "NED"): DynamicsDot = np.array([ [xdot], [ydot], [zdot], [-0.5 * p * q1 - 0.5 * q * q2 - 0.5 * q3 * r], [0.5 * p * q0 - 0.5 * q * q3 + 0.5 * q2 * r], [0.5 * p * q3 + 0.5 * q * q0 - 0.5 * q1 * r], [-0.5 * p * q2 + 0.5 * q * q1 + 0.5 * q0 * r], [(Cd * sign(velW * cos(qW1) * cos(qW2) - xdot) * (velW * cos(qW1) * cos(qW2) - xdot)**2 - 2 * (q0 * q2 + q1 * q3) * (ThrM1 + ThrM2 + ThrM3 + ThrM4)) / mB], [(Cd * sign(velW * sin(qW1) * cos(qW2) - ydot) * (velW * sin(qW1) * cos(qW2) - ydot)**2 + 2 * (q0 * q1 - q2 * q3) * (ThrM1 + ThrM2 + ThrM3 + ThrM4)) / mB], [(-Cd * sign(velW * sin(qW2) + zdot) * (velW * sin(qW2) + zdot)**2 - (ThrM1 + ThrM2 + ThrM3 + ThrM4) * (q0**2 - q1**2 - q2**2 + q3**2) + g * mB) / mB], [ ((IByy - IBzz) * q * r - uP * IRzz * (wM1 - wM2 + wM3 - wM4) * q + (ThrM1 - ThrM2 - ThrM3 + ThrM4) * dym) / IBxx ], # uP activates or deactivates the use of gyroscopic precession. [ ((IBzz - IBxx) * p * r + uP * IRzz * (wM1 - wM2 + wM3 - wM4) * p + (ThrM1 + ThrM2 - ThrM3 - ThrM4) * dxm) / IByy ], # Set uP to False if rotor inertia is not known (gyro precession has negigeable effect on drone dynamics) [((IBxx - IByy) * p * q - TorM1 + TorM2 - TorM3 + TorM4) / IBzz ] ]) elif (config.orient == "ENU"): DynamicsDot = np.array([ [xdot], [ydot], [zdot], [-0.5 * p * q1 - 0.5 * q * q2 - 0.5 * q3 * r], [0.5 * p * q0 - 0.5 * q * q3 + 0.5 * q2 * r], [0.5 * p * q3 + 0.5 * q * q0 - 0.5 * q1 * r], [-0.5 * p * q2 + 0.5 * q * q1 + 0.5 * q0 * r], [(Cd * sign(velW * cos(qW1) * cos(qW2) - xdot) * (velW * cos(qW1) * cos(qW2) - xdot)**2 + 2 * (q0 * q2 + q1 * q3) * (ThrM1 + ThrM2 + ThrM3 + ThrM4)) / mB], [(Cd * sign(velW * sin(qW1) * cos(qW2) - ydot) * (velW * sin(qW1) * cos(qW2) - ydot)**2 - 2 * (q0 * q1 - q2 * q3) * (ThrM1 + ThrM2 + ThrM3 + ThrM4)) / mB], [(-Cd * sign(velW * sin(qW2) + zdot) * (velW * sin(qW2) + zdot)**2 + (ThrM1 + ThrM2 + ThrM3 + ThrM4) * (q0**2 - q1**2 - q2**2 + q3**2) - g * mB) / mB], [ ((IByy - IBzz) * q * r + uP * IRzz * (wM1 - wM2 + wM3 - wM4) * q + (ThrM1 - ThrM2 - ThrM3 + ThrM4) * dym) / IBxx ], # uP activates or deactivates the use of gyroscopic precession. [ ((IBzz - IBxx) * p * r - uP * IRzz * (wM1 - wM2 + wM3 - wM4) * p + (-ThrM1 - ThrM2 + ThrM3 + ThrM4) * dxm) / IByy ], # Set uP to False if rotor inertia is not known (gyro precession has negigeable effect on drone dynamics) [((IBxx - IBzz) * p * q + TorM1 - TorM2 + TorM3 - TorM4) / IBzz ] ]) # State Derivative Vector # --------------------------- sdot = np.zeros([21]) sdot[0] = DynamicsDot[0] sdot[1] = DynamicsDot[1] sdot[2] = DynamicsDot[2] sdot[3] = DynamicsDot[3] sdot[4] = DynamicsDot[4] sdot[5] = DynamicsDot[5] sdot[6] = DynamicsDot[6] sdot[7] = DynamicsDot[7] sdot[8] = DynamicsDot[8] sdot[9] = DynamicsDot[9] sdot[10] = DynamicsDot[10] sdot[11] = DynamicsDot[11] sdot[12] = DynamicsDot[12] sdot[13] = wdotM1 sdot[14] = wddotM1 sdot[15] = wdotM2 sdot[16] = wddotM2 sdot[17] = wdotM3 sdot[18] = wddotM3 sdot[19] = wdotM4 sdot[20] = wddotM4 self.acc = sdot[7:10] return sdot def print_mode(self, mode_prev, mode_new): ''' Only for the python environment. It updates the mode of the drone by printing the identification number of the drone, its previous mode and its current one. ''' print("Drone {0} switch from {1} to {2}.".format( self.quad_id, mode_prev, mode_new)) def update(self, t, Ts, wind, i, quad): ''' It is used to update the attributes of each drone. First of all, the Boolean must be changed into true for specific modes and to false for others following their nature. If the initial mode is “enemy”, no update is required as neither collision avoidance or tracking features are activated. However, in the python environment, if the target becomes neutralized and its mode is changed into “fall”, the goal position will be updated once as its initial goal position with null height to simulate its crash. This is done by the MAVlink command “stabilize” in the more complex environment. If the initial mode is not “enemy” and the Boolean is true, then update occurs. In the case of an agent in “guided” mode, it will update the position of other drones as dynamic obstacles to enhance collision avoidance. For that, it will compare the identification number of each drone and verify it does not match its own. Then, the temporal position of obstacles will be jointly stored containing the static and dynamic ones. After that, the guidance algorithm will be called and the trajectory updated. For an automated transition of modes, a distance threshold can be employed to consider the goal position reached. For that, the norm of the difference between the current position and the goal one is computed and compared with said threshold. If it is smaller, the mode can be switched to “home”. In the case of an agent in “track” mode, it will update both the position of other drones for collision avoidance and its goal position. For that, the collision avoidance happens similarly to “guided” mode although in this case, it will discard the identification numbers that match both its own and the target it is tasked to follow. As for the tracking aspect, the target position is assumed to be provided by the Situation Awareness team. However, in the initial stages it is extracted from the environment. For a basic tracking, the goal position is assumed to be the target position with an additional half meter in z axis, in order for the agent to hover over it. Nonetheless, this means that the agent will never be able to hover exactly over the target. For an improved tracking performance, the future position of the target is estimated using a constant velocity assumption. This discretized velocity is computed based on its current and past positions. This estimation can be further improved by including the continuous velocity, acceleration or by tuning its parameters. This estimation, after adding the hovering distance is employed for the path planning. In order to check whether or not a target has been neutralized two thresholds are needed, one associated with time and another one with the distance between the tracking agent and its target. In this project, the thresholds are set to 1m for 1.5s (three times the update time). If both are met, the Boolean neutralize becomes true. This one will later be used to change the target mode from “enemy” to “neutralized”. Once again, a normal automated transition for the agent would be “home” mode. In the case of the remaining modes associated with actions, the longer ones can also include collision avoidance as a feature and therefore require updating. This is the case of “home”, “land” or “hover”, but not the case of “charging” or “takeoff”. Nonetheless, in the more complex environment, most of these are predefined and can be directly sent as MAVlink commands. After the mode and goal position are updated, the trajectories are overwritten. For that, the current states are updated and the commands recomputed. ''' if (self.t_update > self.Tf) or (t == 0): update = True self.t_update = 0 else: update = False if self.mode == "fall": self.pos_goal = np.hstack([self.pos[0], self.pos[1], -0.5]).astype(float) self.traj = Trajectory(self.psi, self.ctrlType, self.trajSelect, pos_ini=self.pos, pos_goal=self.pos_goal, pos_obs=np.array([[0, 0, 0]]), Tf=self.Tf, t_ini=t) self.ctrl = Control(self.params["w_hover"], self.traj.yawType) self.mode = "neutralized" self.print_mode("ennemy", "fall") # suggestion for traking algos: if (self.mode != 'ennemy' and update): if self.mode == 'track': pos_dyn_obs = [ x[1] for x in self.pos_quads if (x[0] != self.id_targ and x[0] != self.quad_id) ] try: temp_pos_obs = np.vstack( (self.pos_obs, pos_dyn_obs)).astype(float) except: temp_pos_obs = np.vstack((self.pos_obs)).astype(float) pos_targ = [ x[1] for x in self.pos_quads if x[0] == self.id_targ ][0] # mix desired velocity vector with the velocity of the target # lectures on coop guidance P16 ==> modif for moving target (it will add a gain) # need to understand the guidance command we are using # derivative ==> more reactivness # otherwise moddif wp but stab issues # ask SA if they can provide us the velocity of the targets, better than discretize estimation estimate_pos_targ = 3 * pos_targ - 2 * self.previous_pos_targ self.pos_goal = np.hstack((estimate_pos_targ) + [0, 0, 1.5]).astype(float) dist = np.sqrt( (self.previous_pos_us[0] - self.previous_pos_targ[0])**2 + (self.previous_pos_us[1] - self.previous_pos_targ[1])**2 + (self.previous_pos_us[2] - self.previous_pos_targ[2])**2) print(dist) if dist < 1: self.t_track += Ts if self.t_track > 3 * Ts: self.neutralize = True self.mode = "home" self.print_mode("track", "home") self.t_track = t - Ts else: self.t_track = 0 self.previous_pos_targ = pos_targ self.previous_pos_us = self.pos if self.mode == 'guided': try: pos_dyn_obs = [ x[1] for x in self.pos_quads if (x[0] != self.quad_id) ] temp_pos_obs = np.vstack( (self.pos_obs, pos_dyn_obs)).astype(float) except: temp_pos_obs = np.vstack((self.pos_obs)).astype(float) dist = np.sqrt((self.pos[0] - self.pos_goal[0])**2 + (self.pos[1] - self.pos_goal[1])**2 + (self.pos[2] - self.pos_goal[2])**2) if dist < 1: self.mode = "home" self.print_mode("guided", "home") if self.mode == 'home': pos_dyn_obs = [ x[1] for x in self.pos_quads if (x[0] != self.quad_id) ] temp_pos_obs = np.vstack( (self.pos_obs, pos_dyn_obs)).astype(float) self.pos_goal = np.hstack(self.pos_ini).astype(float) dist = np.sqrt((self.pos[0] - self.pos_goal[0])**2 + (self.pos[1] - self.pos_goal[1])**2 + (self.pos[2] - self.pos_goal[2])**2) if dist < 1: self.mode = "land" self.print_mode("home", "land") if self.mode == 'land': pos_dyn_obs = [ x[1] for x in self.pos_quads if (x[0] != self.quad_id) ] temp_pos_obs = np.vstack( (self.pos_obs, pos_dyn_obs)).astype(float) self.pos_goal = np.hstack([self.pos[0], self.pos[1], -0.5]).astype(float) if self.mode == 'hover': pos_dyn_obs = [ x[1] for x in self.pos_quads if (x[0] != self.quad_id) ] temp_pos_obs = np.vstack( (self.pos_obs, pos_dyn_obs)).astype(float) self.pos_goal = np.hstack(self.pos).astype(float) self.traj = Trajectory(self.psi, self.ctrlType, self.trajSelect, pos_ini=self.pos, pos_goal=self.pos_goal, pos_obs=temp_pos_obs, Tf=self.Tf, t_ini=t) self.ctrl = Control(self.params["w_hover"], self.traj.yawType) prev_vel = self.vel prev_omega = self.omega self.sDes = self.traj.desiredState(t, Ts, self.pos) self.integrator.set_f_params(self.ctrl.w_cmd, wind) self.state = self.integrator.integrate(t, t + Ts) self.pos = self.state[0:3] self.quat = self.state[3:7] self.vel = self.state[7:10] self.omega = self.state[10:13] self.wMotor = np.array( [self.state[13], self.state[15], self.state[17], self.state[19]]) self.vel_dot = (self.vel - prev_vel) / Ts self.omega_dot = (self.omega - prev_omega) / Ts self.extended_state() self.forces() self.t_all[i] = t self.s_all[i, :] = self.state self.pos_all[i, :] = self.pos self.vel_all[i, :] = self.vel self.quat_all[i, :] = self.quat self.omega_all[i, :] = self.omega self.euler_all[i, :] = self.euler self.sDes_traj_all[i, :] = self.traj.sDes self.sDes_calc_all[i, :] = self.ctrl.sDesCalc self.w_cmd_all[i, :] = self.ctrl.w_cmd self.wMotor_all[i, :] = self.wMotor self.thr_all[i, :] = self.thr self.tor_all[i, :] = self.tor self.pf_map_all[i] = self.traj.data # Trajectory for Desired States # --------------------------- self.sDes = self.traj.desiredState(t, Ts, self.pos) # Generate Commands (for next iteration) # --------------------------- self.ctrl.controller(self.traj, quad, self.sDes, Ts) self.t_update += Ts def init(self, Ts, quad): ''' It is employed to input the first value of the attributes after initialization in the python environment. ''' # Trajectory for First Desired States # --------------------------- self.sDes = self.traj.desiredState(0, Ts, self.pos) # Generate First Commands # --------------------------- self.ctrl.controller(self.traj, quad, self.sDes, Ts) self.t_all[0] = self.Ti self.s_all[0, :] = self.state self.pos_all[0, :] = self.pos self.vel_all[0, :] = self.vel self.quat_all[0, :] = self.quat self.omega_all[0, :] = self.omega self.euler_all[0, :] = self.euler self.sDes_traj_all[0, :] = self.traj.sDes self.sDes_calc_all[0, :] = self.ctrl.sDesCalc self.w_cmd_all[0, :] = self.ctrl.w_cmd self.wMotor_all[0, :] = self.wMotor self.thr_all[0, :] = self.thr self.tor_all[0, :] = self.tor self.pf_map_all[0] = self.traj.data