class JsbSimEnv(gym.Env): """ A class wrapping the JSBSim flight dynamics module (FDM) for simulating aircraft as an RL environment conforming to the OpenAI Gym Env interface. An JsbSimEnv is instantiated with a Task that implements a specific aircraft control task with its own specific observation/action space and variables and agent_reward calculation. ATTRIBUTION: this class implements the OpenAI Gym Env API. Method docstrings have been adapted or copied from the OpenAI Gym source code. """ JSBSIM_DT_HZ: int = 60 # JSBSim integration frequency metadata = {'render.modes': ['human', 'flightgear']} def __init__(self, task_type: Type[HeadingControlTask], aircraft: Aircraft = cessna172P, agent_interaction_freq: int = 5, shaping: Shaping=Shaping.STANDARD): """ Constructor. Inits some internal state, but JsbSimEnv.reset() must be called first before interacting with environment. :param task_type: the Task subclass for the task agent is to perform :param aircraft: the JSBSim aircraft to be used :param agent_interaction_freq: int, how many times per second the agent should interact with environment. :param shaping: a HeadingControlTask.Shaping enum, what type of agent_reward shaping to use (see HeadingControlTask for options) """ if agent_interaction_freq > self.JSBSIM_DT_HZ: raise ValueError('agent interaction frequency must be less than ' 'or equal to JSBSim integration frequency of ' f'{self.JSBSIM_DT_HZ} Hz.') self.sim: Simulation = None self.sim_steps_per_agent_step: int = self.JSBSIM_DT_HZ // agent_interaction_freq self.aircraft = aircraft self.task = task_type(shaping, agent_interaction_freq, aircraft) # set Space objects self.observation_space: gym.spaces.Box = self.task.get_state_space() self.action_space: gym.spaces.Box = self.task.get_action_space() # set visualisation objects self.figure_visualiser: FigureVisualiser = None self.flightgear_visualiser: FlightGearVisualiser = None self.step_delay = None def step(self, action: np.ndarray) -> Tuple[np.ndarray, float, bool, Dict]: """ Run one timestep of the environment's dynamics. When end of episode is reached, you are responsible for calling `reset()` to reset this environment's state. Accepts an action and returns a tuple (observation, reward, done, info). :param action: the agent's action, with same length as action variables. :return: state: agent's observation of the current environment reward: amount of reward returned after previous action done: whether the episode has ended, in which case further step() calls are undefined info: auxiliary information, e.g. full reward shaping data """ if not (action.shape == self.action_space.shape): raise ValueError('mismatch between action and action space size') state, reward, done, info = self.task.task_step(self.sim, action, self.sim_steps_per_agent_step) return np.array(state), reward, done, info def reset(self): """ Resets the state of the environment and returns an initial observation. :return: array, the initial observation of the space. """ init_conditions = self.task.get_initial_conditions() if self.sim: self.sim.reinitialise(init_conditions) else: self.sim = self._init_new_sim(self.JSBSIM_DT_HZ, self.aircraft, init_conditions) state = self.task.observe_first_state(self.sim) if self.flightgear_visualiser: self.flightgear_visualiser.configure_simulation_output(self.sim) return np.array(state) def _init_new_sim(self, dt, aircraft, initial_conditions): return Simulation(sim_frequency_hz=dt, aircraft=aircraft, init_conditions=initial_conditions) def render(self, mode='flightgear', flightgear_blocking=True): """Renders the environment. The set of supported modes varies per environment. (And some environments do not support rendering at all.) By convention, if mode is: - human: render to the current display or terminal and return nothing. Usually for human consumption. - rgb_array: Return an numpy.ndarray with shape (x, y, 3), representing RGB values for an x-by-y pixel image, suitable for turning into a video. - ansi: Return a string (str) or StringIO.StringIO containing a terminal-style text representation. The text can include newlines and ANSI escape sequences (e.g. for colors). Note: Make sure that your class's metadata 'render.modes' key includes the list of supported modes. It's recommended to call super() in implementations to use the functionality of this method. :param mode: str, the mode to render with :param flightgear_blocking: waits for FlightGear to load before returning if True, else returns immediately """ if mode == 'human': if not self.figure_visualiser: self.figure_visualiser = FigureVisualiser(self.sim, self.task.get_props_to_output()) self.figure_visualiser.plot(self.sim) elif mode == 'flightgear': if not self.flightgear_visualiser: self.flightgear_visualiser = FlightGearVisualiser(self.sim, self.task.get_props_to_output(), flightgear_blocking) self.flightgear_visualiser.plot(self.sim) else: super().render(mode=mode) def close(self): """ Cleans up this environment's objects Environments automatically close() when garbage collected or when the program exits. """ if self.sim: self.sim.close() if self.figure_visualiser: self.figure_visualiser.close() if self.flightgear_visualiser: self.flightgear_visualiser.close() def seed(self, seed=None): """ Sets the seed for this env's random number generator(s). Note: Some environments use multiple pseudorandom number generators. We want to capture all such seeds used in order to ensure that there aren't accidental correlations between multiple generators. Returns: list<bigint>: Returns the list of seeds used in this env's random number generators. The first value in the list should be the "main" seed, or the value which a reproducer should pass to 'seed'. Often, the main seed equals the provided 'seed', but this won't be true if seed=None, for example. """ gym.logger.warn("Could not seed environment %s", self) return
class TestFlightGearVisualiser(unittest.TestCase): env = None sim = None flightgear = None def setUp(self): if self.env: self.env.close() if self.sim: self.sim.close() self.task = BasicFlightTask() self.env = JsbSimEnv(task_type=BasicFlightTask) self.env.reset() self.sim = self.env.sim self.flightgear = None # individual test methods should init as needed: # self.flightgear = FlightGearVisualiser(self.sim) def tearDown(self): if self.env: self.env.close() if self.sim: self.sim.close() if self.flightgear: self.flightgear.close() def test_init_creates_figure(self): self.flightgear = FlightGearVisualiser(self.sim, self.task.get_props_to_output(), block_until_loaded=False) self.assertIsInstance(self.flightgear.figure, FigureVisualiser) def test_launch_flightgear(self): self.flightgear = FlightGearVisualiser(self.sim, self.task.get_props_to_output(), block_until_loaded=False) time.sleep(0.5) # check FlightGear has launched by looking at stdout self.assertIn( 'FlightGear', self.flightgear.flightgear_process.stdout.readline().decode()) self.flightgear.close() def test_close_closes_flightgear(self): self.flightgear = FlightGearVisualiser(self.sim, self.task.get_props_to_output(), block_until_loaded=False) self.flightgear.close() timeout_seconds = 2.0 return_code = self.flightgear.flightgear_process.wait( timeout=timeout_seconds) # a non-None return code indicates termination self.assertIsNotNone(return_code) def test_plot_displays_actions(self): self.setUp() self.flightgear = FlightGearVisualiser(self.sim, self.task.get_props_to_output(), block_until_loaded=False) self.flightgear.plot(self.sim) # the figure should have plotted a Lines object each axis for axis in ['axes_stick', 'axes_rudder', 'axes_throttle']: axis_data_plots = getattr(self.flightgear.figure.axes, axis) is_empty_plot = len(axis_data_plots.axes.lines) == 0 self.assertFalse(is_empty_plot, msg=f'no data plotted on axis {axis}')
class JsbSimEnv(gym.Env): """ A class wrapping the JSBSim flight dynamics module (FDM) for simulating aircraft as an RL environment conforming to the OpenAI Gym Env interface. An JsbSimEnv is instantiated with a Task that implements a specific aircraft control task with its own specific observation/action space and variables and agent_reward calculation. ATTRIBUTION: this class implements the OpenAI Gym Env API. Method docstrings have been adapted or copied from the OpenAI Gym source code. """ JSBSIM_DT_HZ: int = 60 # JSBSim integration frequency metadata = {'render.modes': ['human', 'flightgear']} def __init__(self, task_type: Type[BaseFlightTask], aircraft: Aircraft = cessna172P, agent_interaction_freq: int = 5): """ Constructor. Inits some internal state, but JsbSimEnv.reset() must be called first before interacting with environment. :param task_type: the Task subclass for the task agent is to perform :param aircraft: the JSBSim aircraft to be used :param agent_interaction_freq: int, how many times per second the agent should interact with environment. """ if agent_interaction_freq > self.JSBSIM_DT_HZ: raise ValueError('agent interaction frequency must be less than ' 'or equal to JSBSim integration frequency of ' f'{self.JSBSIM_DT_HZ} Hz.') self.sim: Simulation = None self.sim_steps_per_agent_step: int = self.JSBSIM_DT_HZ // agent_interaction_freq self.aircraft = aircraft self.task = task_type(agent_interaction_freq, aircraft) # set Space objects self.observation_space: gym.spaces.Box = self.task.get_state_space() self.action_space: gym.spaces.Box = self.task.get_action_space() # set visualisation objects self.figure_visualiser: FigureVisualiser = None self.flightgear_visualiser: FlightGearVisualiser = None self.step_delay = None try: with open('/home/jsbsim/sqs_url.conf', 'r') as file: self._sqs_url = file.readline() sqs = boto3.resource('sqs') self._l2f_queue = sqs.Queue(self._sqs_url) self._NUM_THREADS = 100 self._pool = ThreadPool(self._NUM_THREADS) except Exception: self._sqs_url = None self._l2f_queue = None self._pool = None def step(self, action: np.ndarray) -> Tuple[np.ndarray, float, bool, Dict]: """ Run one timestep of the environment's dynamics. When end of episode is reached, you are responsible for calling `reset()` to reset this environment's state. Accepts an action and returns a tuple (observation, reward, done, info). :param action: the agent's action, with same length as action variables. :return: state: agent's observation of the current environment reward: amount of reward returned after previous action done: whether the episode has ended, in which case further step() calls are undefined info: auxiliary information """ if not (action.shape == self.action_space.shape): raise ValueError('mismatch between action and action space size') state, reward, done, info = self.task.task_step( self.sim, action, self.sim_steps_per_agent_step) return np.array(state), reward, done, info def reset(self): """ Resets the state of the environment and returns an initial observation. :return: array, the initial observation of the space. """ init_conditions = self.task.get_initial_conditions() if self.sim: self.sim.reinitialise(init_conditions) else: self.sim = self._init_new_sim(self.JSBSIM_DT_HZ, self.aircraft, init_conditions) state = self.task.observe_first_state(self.sim) if self.flightgear_visualiser: self.flightgear_visualiser.configure_simulation_output(self.sim) if self._pool: self._pool.join() self._pool.terminate() self._pool = ThreadPool(self._NUM_THREADS) return np.array(state) def _init_new_sim(self, dt, aircraft, initial_conditions): return Simulation(sim_frequency_hz=dt, aircraft=aircraft, init_conditions=initial_conditions) def render(self, mode='flightgear', flightgear_blocking=True): """Renders the environment. The set of supported modes varies per environment. (And some environments do not support rendering at all.) By convention, if mode is: - human: Send the state to AWS SQS. Note: Make sure that your class's metadata 'render.modes' key includes the list of supported modes. It's recommended to call super() in implementations to use the functionality of this method. :param mode: str, the mode to render with :param flightgear_blocking: waits for FlightGear to load before returning if True, else returns immediately """ if mode == 'human': self._send_state_to_sqs() elif mode == 'flightgear': if not self.flightgear_visualiser: self.flightgear_visualiser = FlightGearVisualiser( self.sim, self.task.get_props_to_output(self.sim), flightgear_blocking) self.flightgear_visualiser.plot(self.sim) else: super().render(mode=mode) def close(self): """ Cleans up this environment's objects Environments automatically close() when garbage collected or when the program exits. """ if self.sim: self.sim.close() if self.figure_visualiser: self.figure_visualiser.close() if self.flightgear_visualiser: self.flightgear_visualiser.close() def seed(self, seed=None): """ Sets the seed for this env's random number generator(s). Note: Some environments use multiple pseudorandom number generators. We want to capture all such seeds used in order to ensure that there aren't accidental correlations between multiple generators. Returns: list<bigint>: Returns the list of seeds used in this env's random number generators. The first value in the list should be the "main" seed, or the value which a reproducer should pass to 'seed'. Often, the main seed equals the provided 'seed', but this won't be true if seed=None, for example. """ gym.logger.warn("Could not seed environment %s", self) return def _get_full_state(self): state = {prop.name: self.sim[prop] for prop in self.task.all_props} state['epochtime'] = time.time() # required to sort queue return state def _send_state_to_sqs(self): ''' Send the rendering deque to SQS ''' if self._l2f_queue: message_body = json.dumps(self._get_full_state) self._pool.apply_async(self._l2f_queue.send_message, kwds=dict( MessageBody=message_body, MessageGroupId='state_action_history')) else: import warnings warnings.warn('No SQS queue available.')