class GymTask(Task): """Gym task.""" def __init__(self, skill_context: SkillContext, nb_steps: int = DEFAULT_NB_STEPS): """Initialize the task.""" super().__init__(logger=skill_context.logger) self.logger.debug( "GymTask.__init__: arguments: nb_steps={}".format(nb_steps)) self._rl_agent = MyRLAgent(NB_GOODS, self.logger) self._proxy_env = ProxyEnv(skill_context) self.nb_steps = nb_steps self._rl_agent_training_thread = Thread( target=self._fit, args=[self._proxy_env, self.nb_steps]) self.is_rl_agent_training = False def _fit(self, proxy_env: ProxyEnv, nb_steps: int): """Fit the RL agent.""" self._rl_agent.fit(proxy_env, nb_steps) self.logger.info("Training finished. You can exit now via CTRL+C.") @property def proxy_env(self) -> ProxyEnv: """Get the queue.""" return self._proxy_env @property def proxy_env_queue(self) -> Queue: """Get the queue.""" return self._proxy_env.queue def setup(self) -> None: """Set up the task.""" self.logger.info("Gym task: setup method called.") def execute(self, *args, **kwargs) -> None: """Execute the task.""" if not self._proxy_env.is_rl_agent_trained and not self.is_rl_agent_training: self._start_training() if self._proxy_env.is_rl_agent_trained and self.is_rl_agent_training: self._stop_training() def teardown(self) -> None: """Teardown the task.""" self.logger.info("Gym Task: teardown method called.") if self.is_rl_agent_training: self._stop_training() def _start_training(self) -> None: """Start training the RL agent.""" self.logger.info("Training starting ...") self.is_rl_agent_training = True self._rl_agent_training_thread.start() def _stop_training(self) -> None: """Stop training the RL agent.""" self.is_rl_agent_training = False self._proxy_env.close() self._rl_agent_training_thread.join()
def fit(self, proxy_env: ProxyEnv, nb_steps: int) -> None: """ Train the agent on the given proxy environment. :param proxy_env: the proxy gym environment :param nb_steps: number of training steps to be performed. :return: None """ action_counter = 0 proxy_env.reset() while action_counter < nb_steps: action = self._pick_an_action() obs, reward, done, info = proxy_env.step(action) self._update_model(obs, reward, done, info, action) action_counter += 1 if action_counter % 10 == 0: self.logger.info( "Action: step_id='{}' action='{}' reward='{}'".format( action_counter, action, reward)) proxy_env.close()