def __init__(self, stepweite=0.01): super(WrapperOpenAI, self).__init__() # Define action and observation space # They must be gym.spaces objects # nur aileron high_action_space = np.array([1.], dtype=np.float32) self.action_space = spaces.Box(low=-high_action_space, high=high_action_space, dtype=np.float32) # Zustandsraum 4 current_phi_dot, current_phi, abs(error_current_phi_target_phi), integration_error high_observation_space = np.array([np.inf, np.inf, np.inf], dtype=np.float32) self.observation_space = spaces.Box(low=-high_observation_space, high=high_observation_space, dtype=np.float32) # reward self.reward_range = np.array([-np.inf, np.inf], dtype=np.float32) # spezielle Parameter für das Enviroment FDM self.aircraft = Aircraft_baever() # Ball oder C172 self.pid = PidRegler() self.dynamicSystem = DynamicSystem6DoF() self.stepweite = stepweite self.udpClient = UdpClient('127.0.0.1', 5566) # frage: ist das die richtige Stelle, oder besser im DDPG-Controller self.targetValues = { 'targetPhi_grad': 0, 'targetTheta_grad': 0, 'targetPsi': 0, 'targetSpeed': 0, 'target_z_dot': 0.0 } self.envelopeBounds = { 'phiMax_grad': 30, 'phiMin_grad': -30, 'thetaMax_grad': 30, 'thetaMin_grad': -30, 'speedMax': 72, 'speedMin': 33 } # fuer plotten self.plotter = PlotState() self.anzahlSteps = 0 self.anzahlEpisoden = 0 self.observationErrorAkkumulation = np.zeros(3) self.integration_error_stepsize_ = 0 self.servo_command = 0 self.action_servo_command_history = np.zeros(2) self.bandbreite_servo_actions = 0
def __init__(self, stepweite=0.05): super(WrapperOpenAI, self).__init__() # Define action and observation space # They must be gym.spaces objects # nur aileron high_action_space = np.array([1.], dtype=np.float32) self.action_space = spaces.Box(low=-high_action_space, high=high_action_space, dtype=np.float32) # Zustandsraum 4 current_phi_dot, current_phi, abs(error_current_phi_target_phi), integration_error high_observation_space = np.array([np.inf, np.inf, np.inf, np.inf, np.inf], dtype=np.float32) self.observation_space = spaces.Box(low=-high_observation_space, high=high_observation_space, dtype=np.float32) # reward self.reward_range = np.array([-np.inf, np.inf], dtype=np.float32) # spezielle Parameter für das Enviroment FDM #self.aircraft = Aircraft(config.geometrieClass) # Ball oder C172 self.aircraft_beaver = Aircraft_baever() self.pid = PidRegler() self.dynamicSystem = DynamicSystem6DoF() self.umrechnungenKoordinaten = UmrechnungenKoordinaten() self.stepweite = stepweite self.udpClient = UdpClient('127.0.0.1', 5566) # frage: ist das die richtige Stelle, oder besser im DDPG-Controller self.targetValues = {'targetPhi': 0, 'targetTheta': 0, 'targetPsi': 0, 'targetSpeed': 0, 'target_z_dot': 0.0} self.envelopeBounds = {'phiMax': 20, 'phiMin': -20, 'thetaMax': 10, 'thetaMin': -30, 'speedMax': 80, 'speedMin': 33 } self.integration_stepsize = 3 self.error_array_u = np.zeros(self.integration_stepsize) self.integration_error_u = 0 self.error_array_theta = np.zeros(self.integration_stepsize) self.integration_error_theta = 0 # fuer plotten self.plotter = PlotState() self.anzahlSteps = 0 self.anzahlEpisoden = 0
class WrapperOpenAI(gym.Env): """Custom Environment that follows gym interface""" metadata = {'render.modes': ['human']} def __init__(self, stepweite=0.01): super(WrapperOpenAI, self).__init__() # Define action and observation space # They must be gym.spaces objects # nur aileron high_action_space = np.array([1.], dtype=np.float32) self.action_space = spaces.Box(low=-high_action_space, high=high_action_space, dtype=np.float32) # Zustandsraum 4 current_phi_dot, current_phi, abs(error_current_phi_target_phi), integration_error high_observation_space = np.array( [np.inf, np.inf, np.inf, np.inf, np.inf], dtype=np.float32) self.observation_space = spaces.Box(low=-high_observation_space, high=high_observation_space, dtype=np.float32) # reward self.reward_range = np.array([-np.inf, np.inf], dtype=np.float32) # spezielle Parameter für das Enviroment FDM self.aircraft = Aircraft(config.geometrieClass) # Ball oder C172 self.pid = PidRegler() self.dynamicSystem = DynamicSystem6DoF() self.stepweite = stepweite self.udpClient = UdpClient('127.0.0.1', 5566) # frage: ist das die richtige Stelle, oder besser im DDPG-Controller self.targetValues = { 'targetPhi_grad': 0, 'targetTheta_grad': 0, 'targetPsi': 0, 'targetSpeed': 0, 'target_z_dot': 0.0 } self.envelopeBounds = { 'phiMax': 20, 'phiMin': -20, 'thetaMax_grad': 30, 'thetaMin_grad': -30, 'speedMax': 72, 'speedMin': 33 } self.observationErrorAkkumulation = np.zeros(3) self.integration_error_stepsize_ = 0 # fuer plotten self.plotter = PlotState() self.anzahlSteps = 0 self.anzahlEpisoden = 0 self.servo_command = 0 self.action_servo_command_history = np.zeros(2) self.bandbreite_servo_actions = 0 def reset(self): self.observationErrorAkkumulation = np.zeros(3) self.integration_error_stepsize_ = 0 self.servo_command = 0 self.action_servo_command_history = np.zeros(2) self.bandbreite_servo_actions = 0 self.anzahlSteps = 1 self.anzahlEpisoden += 1 # set targets for different angles self.targetValues['targetTheta'] = np.random.uniform(-5, 5) print('new Target (deg): ', self.targetValues) # set state at initial phi_as_random = np.deg2rad(np.random.uniform(0, 0)) theta_as_random = np.deg2rad(np.random.uniform(-3, 3)) self.aircraft.setState( np.array([ 40.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, phi_as_random, theta_as_random, 0.0 ])) observation = self.user_defined_observation(self.aircraft.getState()) return observation # reward, done, info can't be included def step(self, actionElevator): self.anzahlSteps += 1 # action im Intervall [-1,1] # mapping auf Begrenzung der Steuerflächen self.servo_command = actionElevator[0] set_elevator = np.interp( actionElevator[0], [-1, 1], self.aircraft.SteuerflaechenUndMotorStellung. controlLimits['deltaElevatorBorder']) # print(set_aileron) self.aircraft.SteuerflaechenUndMotorStellung.deltaElevator = set_elevator # Headline: phi wird mit PID-Regler stabilisiert set_aileron = self.pid._innerLoopAileron( 0, np.rad2deg(self.aircraft.phi), self.aircraft.p, self.aircraft.SteuerflaechenUndMotorStellung.deltaAileron) self.aircraft.SteuerflaechenUndMotorStellung.deltaAileron = set_aileron # Headline: integrate step solver = self.dynamicSystem.integrate( self.aircraft.getState(), self.aircraft.getForces(), self.aircraft.getMoments(), self.aircraft.mass, self.aircraft.inertia, self.stepweite ) # def integrate(self, state, mass, inertia, forces, moments, stepweite): # State1 self.aircraft.setState( np.array([ solver.y[0][0], solver.y[1][0], solver.y[2][0], solver.y[3][0], solver.y[4][0], solver.y[5][0], solver.y[6][0], solver.y[7][0], solver.y[8][0], solver.y[9][0], solver.y[10][0], solver.y[11][0] ])) observation = self.user_defined_observation(self.aircraft.getState()) reward = self.compute_reward(self.aircraft.getState()) done = self.check_done(self.aircraft.getState()) # Headline: ab hier für plotten self.plotter.addData( self.aircraft.getState(), self.aircraft.getForces(), self.aircraft.getMoments(), self.aircraft.alpha, self.aircraft.beta, np.rad2deg(self.aircraft.SteuerflaechenUndMotorStellung. getSteuerflaechenUndMotorStellung()), self.anzahlSteps + self.anzahlEpisoden * 400) #Headline ist anzupassen return observation, reward, done, {} def render(self, mode='human'): self.udpClient.send((struct.pack('fff', self.aircraft.phi, 0, 0))) def close(self): pass def seed(self, seed=None): return def user_defined_observation(self, observation): # return: current_phi_dot, current_phi, abs(error_current_phi_target_phi), integration_error current_theta_dot = observation[7] current_theta = observation[10] error_current_theta_to_target_theta = np.abs( np.rad2deg(observation[10]) - self.targetValues['targetTheta']) self.observationErrorAkkumulation = np.roll( self.observationErrorAkkumulation, 1) self.observationErrorAkkumulation[ -1] = error_current_theta_to_target_theta self.integration_error_stepsize_ = np.add.reduce( self.observationErrorAkkumulation) self.action_servo_command_history = np.roll( self.action_servo_command_history, len(self.action_servo_command_history) - 1) self.action_servo_command_history[-1] = self.servo_command self.bandbreite_servo_actions = np.abs( np.min(self.action_servo_command_history) - np.max(self.action_servo_command_history)) # enrichment of Observation: akkumulated error -> 1. rotate and add element 2. reduce to skalar observation = np.asarray([ current_theta_dot, current_theta, error_current_theta_to_target_theta, self.integration_error_stepsize_, self.action_servo_command_history[0] ]) return observation def compute_reward(self, observation): reward = 0 # exceeds bounds [-20, 20] -> -100 if np.rad2deg(observation[10] ) > self.envelopeBounds['thetaMax'] or np.rad2deg( observation[10]) < self.envelopeBounds['thetaMin']: reward += -100 # Abweichung abs(target-current) > 1 -> -1 if np.abs( np.rad2deg(observation[10]) - self.targetValues['targetTheta']) > 1: reward += -1 # Abweichung abs(target-current) <= 1 -> 10 if np.abs( np.rad2deg(observation[10]) - self.targetValues['targetTheta']) <= 1: reward += 10 #reward += -0.01 * np.power((1 * self.action_servo_command_history[0] - 1 * self.action_servo_command_history[1]), 2) return reward def check_done(self, observation): done = 0 # conditions_if_reset = all( [30 <= observation[0] <= 50, 30 <= observation[1] <= 50]) if observation[0] < self.envelopeBounds['speedMin'] or observation[ 0] > self.envelopeBounds['speedMax']: print("speed limits", observation[0]) done = 1 # conditions_if_reset_speed = any([observation[0] < 30, observation[0] > 50]) if np.rad2deg(observation[9]) < self.envelopeBounds['phiMin'] or np.rad2deg(observation[9]) > \ self.envelopeBounds['phiMax']: print("roll limits", np.rad2deg(observation[9])) done = 1 # conditions_if_reset_phi = any([self.envelopeBounds['phiMin'] > np.rad2deg(observation[9]), np.rad2deg(observation[9]) > self.envelopeBounds['phiMax']]) if np.rad2deg(observation[10]) < self.envelopeBounds['thetaMin_grad'] or np.rad2deg(observation[10]) > \ self.envelopeBounds['thetaMax_grad']: print("pitch limits", np.rad2deg(observation[10])) done = 1 return done
class WrapperOpenAI (gym.Env): """Custom Environment that follows gym interface""" metadata = {'render.modes': ['human']} def __init__(self, stepweite=0.02): super(WrapperOpenAI, self).__init__() # Define action and observation space # They must be gym.spaces objects # nur aileron high_action_space = np.array([1.], dtype=np.float32) self.action_space = spaces.Box(low=-high_action_space, high=high_action_space, dtype=np.float32) # Zustandsraum 4 current_phi_dot, current_phi, abs(error_current_phi_target_phi), integration_error high_observation_space = np.array([np.inf, np.inf, np.inf, np.inf], dtype=np.float32) self.observation_space = spaces.Box(low=-high_observation_space, high=high_observation_space, dtype=np.float32) # reward self.reward_range = np.array([-np.inf, np.inf], dtype=np.float32) # spezielle Parameter für das Enviroment FDM #self.aircraft = Aircraft(config.geometrieClass) # Ball oder C172 self.aircraft_beaver = Aircraft_baever() self.pid = PidRegler() self.dynamicSystem = DynamicSystem6DoF() self.umrechnungenKoordinaten = UmrechnungenKoordinaten() self.stepweite = stepweite self.udpClient = UdpClient('127.0.0.1', 5566) # frage: ist das die richtige Stelle, oder besser im DDPG-Controller self.targetValues = {'targetPhi_grad': 0, 'targetTheta_grad': 0, 'targetPsi': 0, 'targetSpeed': 0, 'target_z_dot': 0.0} self.envelopeBounds = {'phiMax_grad': 30, 'phiMin_grad': -30, 'thetaMax_grad': 30, 'thetaMin_grad': -30, 'speedMax': 72, 'speedMin': 33 } self.servo_command_elevator = 0 self.action_servo_command_history_elevator = np.zeros(2) self.bandbreite_servo_actions_elevator = 0 self.servo_command_aileron = 0 self.action_servo_command_history_aileron = np.zeros(2) self.bandbreite_servo_actions_aileron = 0 # fuer plotten self.plotter = PlotState() self.anzahlSteps = 0 self.anzahlEpisoden = 0 def reset(self): np.random.seed() self.servo_command_elevator = 0 self.action_servo_command_history_elevator = np.zeros(2) self.bandbreite_servo_actions_elevator = 0 self.servo_command_aileron = 0 self.action_servo_command_history_aileron = np.zeros(2) self.bandbreite_servo_actions_aileron = 0 self.anzahlSteps = 1 self.anzahlEpisoden += 1 # set targets self.targetValues['targetSpeed'] = np.random.uniform(40, 55) # m/s self.targetValues['targetTheta_grad'] = np.random.uniform(0, 0) # m/s self.targetValues['targetPhi_grad'] = np.random.uniform(0,0) # m/s self.targetValues['target_z_dot'] = np.random.uniform(0, 0) # m/s print('new Targets: ', self.targetValues) # set state at initial u_as_random = np.random.uniform(40, 55) phi_as_random = np.deg2rad(np.random.uniform(-0, 0)) theta_as_random = np.deg2rad(np.random.uniform(-10, 10)) self.aircraft_beaver.setState( np.array([u_as_random, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, phi_as_random, theta_as_random, 0.0])) observation = self.user_defined_observation(self.aircraft_beaver.getState(), self.aircraft_beaver.z_dot_g_ks) return observation # reward, done, info can't be included def step(self, action_command): self.servo_command_elevator = action_command[0] self.anzahlSteps += 1 self.aircraft_beaver.delta_elevator = np.deg2rad(np.clip(self.servo_command_elevator, -1, 1) * (20)) self.aircraft_beaver.delta_aileron = self.pid._innerLoopAileron(np.deg2rad(0), self.aircraft_beaver.phi, self.aircraft_beaver.p, self.aircraft_beaver.delta_aileron) # MultiAgent # MultiAgent self.servo_command_thrust = action_command[1] self.aircraft_beaver.delta_thrust = np.interp(self.servo_command_thrust, [-1, 1], [0.0, 1]) # Übersetzung der Ausgabe KNN zu Thrust-Setting # Headline: integrate step for x in range(10): solver = self.dynamicSystem.integrate(self.aircraft_beaver.getState(), self.aircraft_beaver.getForces(), self.aircraft_beaver.getMoments(), self.aircraft_beaver.mass, self.aircraft_beaver.inertia, self.stepweite) # def integrate(self, state, mass, inertia, forces, moments, stepweite): # State1 in f_ks self.aircraft_beaver.setState(np.array( [solver.y[0][0], solver.y[1][0], solver.y[2][0], solver.y[3][0], solver.y[4][0], solver.y[5][0], solver.y[6][0], solver.y[7][0], solver.y[8][0], solver.y[9][0], solver.y[10][0], solver.y[11][0]])) #set Values for g_ks self.aircraft_beaver.x_dot_g_ks, self.aircraft_beaver.y_dot_g_ks, self.aircraft_beaver.z_dot_g_ks = self.umrechnungenKoordinaten.flug2geo( [self.aircraft_beaver.u, self.aircraft_beaver.v, self.aircraft_beaver.w], self.aircraft_beaver.phi, self.aircraft_beaver.theta, self.aircraft_beaver.psi) observation = self.user_defined_observation(self.aircraft_beaver.getState(), self.aircraft_beaver.z_dot_g_ks) reward = self.compute_reward(self.aircraft_beaver.getState(), self.aircraft_beaver.z_dot_g_ks) done = self.check_done(self.aircraft_beaver.getState()) # Headline: ab hier für plotten self.plotter.addData(self.aircraft_beaver.getState(), self.aircraft_beaver.getForces(), self.aircraft_beaver.getMoments(), self.aircraft_beaver.alpha, self.aircraft_beaver.beta, np.rad2deg( self.aircraft_beaver.getSteuerflaechenUndMotorStellung()), self.anzahlSteps + self.anzahlEpisoden * 1000) # Headline ist anzupassen self.plotter.add_data_xyz([self.aircraft_beaver.x_geo, self.aircraft_beaver.y_geo, self.aircraft_beaver.z_geo], self.aircraft_beaver.z_dot_g_ks, self.anzahlSteps + self.anzahlEpisoden * 1000) self.plotter.add_data_Ziel(self.targetValues['targetSpeed'], self.anzahlSteps + self.anzahlEpisoden * 400) return observation, reward, done, {} def render(self, mode='human'): self.udpClient.send((struct.pack('fff', self.aircraft_beaver.phi, 0, 0))) def close(self): pass def seed(self, seed=None): return def user_defined_observation(self, aircraft_state_f_ks, z_dot_g_ks): current_u = aircraft_state_f_ks[0] error_current_u_to_target_u = current_u - self.targetValues['targetSpeed'] current_theta_grad = np.rad2deg(aircraft_state_f_ks[10]) user_defined_observation = np.asarray( [current_u, error_current_u_to_target_u, z_dot_g_ks, current_theta_grad]) return user_defined_observation def compute_reward(self, aircraft_state_f_ks, z_dot_g_ks): reward0 = self.reward_elevator(aircraft_state_f_ks, z_dot_g_ks) reward1 = self.reward_thrust(aircraft_state_f_ks) return reward0, reward1 def reward_elevator(self, aircraft_state_f_ks, z_dot_g_ks): current_theta_grad = np.rad2deg(aircraft_state_f_ks[10]) reward0 = 0 # out of bounds if current_theta_grad < self.envelopeBounds['thetaMin_grad'] or current_theta_grad > self.envelopeBounds[ 'thetaMax_grad']: reward0 += -1000 # Zielgröße Sinken/steigen if np.abs(self.targetValues['target_z_dot'] - z_dot_g_ks) > 1: reward0 += -1 else: reward0 += 100 return reward0 def reward_thrust(self, aircraft_state_f_ks): current_u = aircraft_state_f_ks[0] reward1 = 0 # out of bounds if current_u < self.envelopeBounds['speedMin'] or current_u > self.envelopeBounds[ 'speedMax']: reward1 += -1000 # Zielgröße Sinken/steigen if np.abs(self.targetValues['targetSpeed'] - current_u) > 1: reward1 += -1 else: reward1 += 100 return reward1 def check_done(self, observation): done = 0 # conditions_if_reset = all( [30 <= observation[0] <= 50, 30 <= observation[1] <= 50]) if observation[0] < self.envelopeBounds['speedMin'] or observation[0] > self.envelopeBounds['speedMax']: print("speed limits", observation[0]) done = 1 # conditions_if_reset_speed = any([observation[0] < 30, observation[0] > 50]) if np.rad2deg(observation[9]) < self.envelopeBounds['phiMin_grad'] or np.rad2deg(observation[9]) > \ self.envelopeBounds['phiMax_grad']: print("roll limits", np.rad2deg(observation[9])) done = 1 # conditions_if_reset_phi = any([self.envelopeBounds['phiMin'] > np.rad2deg(observation[9]), np.rad2deg(observation[9]) > self.envelopeBounds['phiMax']]) if np.rad2deg(observation[10]) < self.envelopeBounds['thetaMin_grad'] or np.rad2deg(observation[10]) > \ self.envelopeBounds['thetaMax_grad']: print("pitch limits", np.rad2deg(observation[10])) done = 1 return done
class WrapperOpenAI(gym.Env): """Custom Environment that follows gym interface""" metadata = {'render.modes': ['human']} def __init__(self, stepweite=0.02): super(WrapperOpenAI, self).__init__() # Define action and observation space # They must be gym.spaces objects # nur aileron high_action_space = np.array([1.], dtype=np.float32) self.action_space = spaces.Box(low=-high_action_space, high=high_action_space, dtype=np.float32) # Zustandsraum 4 current_phi_dot, current_phi, abs(error_current_phi_target_phi), integration_error high_observation_space = np.array([np.inf, np.inf], dtype=np.float32) self.observation_space = spaces.Box(low=-high_observation_space, high=high_observation_space, dtype=np.float32) # reward self.reward_range = np.array([-np.inf, np.inf], dtype=np.float32) # spezielle Parameter für das Enviroment FDM self.aircraft_beaver = Aircraft_baever() self.pid = PidRegler() self.dynamicSystem = DynamicSystem6DoF() self.stepweite = stepweite self.udpClient = UdpClient('127.0.0.1', 5566) # frage: ist das die richtige Stelle, oder besser im DDPG-Controller self.targetValues = { 'targetPhi': 0, 'targetTheta': 0, 'targetPsi': 0, 'targetSpeed': 0 } self.envelopeBounds = { 'phiMax': 20, 'phiMin': -20, 'thetaMax': 30, 'thetaMin': -30, 'speedMax': 72, 'speedMin': 30 } self.observationErrorAkkumulation = np.zeros(3) self.integration_error_stepsize_ = 0 # fuer plotten self.plotter = PlotState() self.anzahlSteps = 0 self.anzahlEpisoden = 0 self.servo_command = 0 self.action_servo_command_history = np.zeros(2) self.bandbreite_servo_actions = 0 def reset(self): self.observationErrorAkkumulation = np.zeros(3) self.integration_error_stepsize_ = 0 self.servo_command = 0 self.action_servo_command_history = np.zeros(2) self.bandbreite_servo_actions = 0 self.anzahlSteps = 1 self.anzahlEpisoden += 1 # set targets self.targetValues['targetSpeed'] = np.random.uniform(47, 63) print('new Target: ', self.targetValues) # set state at initial phi_as_random = np.deg2rad(np.random.uniform(0, 0)) theta_as_random = np.deg2rad(np.random.uniform(2.5, 3.5)) self.aircraft_beaver.setState( np.array([ 40.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, phi_as_random, theta_as_random, 0.0 ])) observation = self.user_defined_observation( self.aircraft_beaver.getState()) return observation # reward, done, info can't be included def step(self, actionThrust): self.anzahlSteps += 1 # action im Intervall [-1,1] # mapping auf Begrenzung der Steuerflächen self.servo_command = actionThrust[0] self.aircraft_beaver.delta_thrust = np.interp( actionThrust[0], [-1, 1], [0.0, 1]) # Übersetzung der Ausgabe KNN zu Thrust-Setting # Headline: theta wird mit PID-Regler stabilisiert self.aircraft_beaver.delta_elevator = self.pid._innerLoopElevator( np.deg2rad(0), self.aircraft_beaver.theta, self.aircraft_beaver.q, self.aircraft_beaver.delta_elevator) # Headline: phi wird mit PID-Regler stabilisiert self.aircraft_beaver.delta_aileron = self.pid._innerLoopAileron( np.deg2rad(0), self.aircraft_beaver.phi, self.aircraft_beaver.p, self.aircraft_beaver.delta_aileron) # Headline: integrate step solver = self.dynamicSystem.integrate( self.aircraft_beaver.getState(), self.aircraft_beaver.getForces(), self.aircraft_beaver.getMoments(), self.aircraft_beaver.mass, self.aircraft_beaver.inertia, self.stepweite ) # def integrate(self, state, mass, inertia, forces, moments, stepweite): # State1 self.aircraft_beaver.setState( np.array([ solver.y[0][0], solver.y[1][0], solver.y[2][0], solver.y[3][0], solver.y[4][0], solver.y[5][0], solver.y[6][0], solver.y[7][0], solver.y[8][0], solver.y[9][0], solver.y[10][0], solver.y[11][0] ])) observation = self.user_defined_observation( self.aircraft_beaver.getState()) reward = self.compute_reward(self.aircraft_beaver.getState()) done = self.check_done(self.aircraft_beaver.getState()) # Headline: ab hier für plotten self.plotter.addData( self.aircraft_beaver.getState(), self.aircraft_beaver.getForces(), self.aircraft_beaver.getMoments(), self.aircraft_beaver.alpha, self.aircraft_beaver.beta, (self.aircraft_beaver.getSteuerflaechenUndMotorStellung()), self.anzahlSteps + self.anzahlEpisoden * 1000) #Headline ist anzupassen return observation, reward, done, {} def render(self, mode='human'): self.udpClient.send((struct.pack('fff', self.aircraft_beaver.phi, 0, 0))) def close(self): pass def seed(self, seed=None): return def user_defined_observation(self, aircraft_state_f_ks): current_speed = aircraft_state_f_ks[0] error_current_speed_to_target_speed = aircraft_state_f_ks[ 0] - self.targetValues['targetSpeed'] self.action_servo_command_history = np.roll( self.action_servo_command_history, len(self.action_servo_command_history) - 1) self.action_servo_command_history[-1] = self.servo_command self.bandbreite_servo_actions = np.abs( np.min(self.action_servo_command_history) - np.max(self.action_servo_command_history)) aircraft_state_f_ks = np.asarray( [current_speed, error_current_speed_to_target_speed]) return aircraft_state_f_ks def compute_reward(self, aircraft_state_f_ks): current_u = aircraft_state_f_ks[0] reward1 = 0 if current_u < self.envelopeBounds[ 'speedMin'] or current_u > self.envelopeBounds['speedMax']: reward1 += -1000 if np.abs(current_u - self.targetValues['targetSpeed']) > 1: reward1 += -1 else: reward1 += 10 return reward1 def check_done(self, observation): done = 0 #conditions_if_reset = all( [30 <= observation[0] <= 50, 30 <= observation[1] <= 50]) if observation[0] < 30 or observation[0] > 80: print("speed limits") done = 1 #conditions_if_reset_speed = any([observation[0] < 30, observation[0] > 50]) if self.envelopeBounds['phiMin'] > np.rad2deg( observation[9]) or np.rad2deg( observation[9]) > self.envelopeBounds['phiMax']: print("roll limits", np.rad2deg(observation[9])) done = 1 # conditions_if_reset_phi = any([self.envelopeBounds['phiMin'] > np.rad2deg(observation[9]), np.rad2deg(observation[9]) > self.envelopeBounds['phiMax']]) if self.envelopeBounds['thetaMin'] > np.rad2deg( observation[10]) or np.rad2deg( observation[10]) > self.envelopeBounds['thetaMax']: print("pitch limits", np.rad2deg(observation[10])) done = 1 return done
from FDM.servicesFDM.umrechnungen_koordinaten import UmrechnungenKoordinaten from FDM.servicesFDM.plot_state import PlotState from FDM.physikFDM.dynamic_system6DoF import * from FDM.config import * from FDM.servicesFDM.udp_client import UdpClient from FDM.physikFDM.aircraft_beaver import Aircraft_baever config = Config() umrechnungenKoordinaten = UmrechnungenKoordinaten() plotter = PlotState() # todo: config anpassen #aircraft = Aircraft(config.geometrieClass) # Ball oder C172 aircraft = Aircraft_baever() pid = PidRegler() dynamicSystem = DynamicSystem6DoF() udpClient = UdpClient('127.0.0.1', 5566) #zeitmessung für develop import time summe = 0 start = time.time() #zeitmessung für develop #todo: echtzeitanpassung über time und pause... stepweite = 0.02 steps = 2000 # set initial conditions of a/c aircraft.setState([40., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]) for step in range(steps):