예제 #1
0
class GymTask(Task):
    """Gym task."""
    def __init__(self,
                 skill_context: SkillContext,
                 nb_steps: int = DEFAULT_NB_STEPS):
        """Initialize the task."""
        super().__init__(logger=skill_context.logger)
        self.logger.debug(
            "GymTask.__init__: arguments: nb_steps={}".format(nb_steps))
        self._rl_agent = MyRLAgent(NB_GOODS, self.logger)
        self._proxy_env = ProxyEnv(skill_context)
        self.nb_steps = nb_steps
        self._rl_agent_training_thread = Thread(
            target=self._fit, args=[self._proxy_env, self.nb_steps])
        self.is_rl_agent_training = False

    def _fit(self, proxy_env: ProxyEnv, nb_steps: int):
        """Fit the RL agent."""
        self._rl_agent.fit(proxy_env, nb_steps)
        self.logger.info("Training finished. You can exit now via CTRL+C.")

    @property
    def proxy_env(self) -> ProxyEnv:
        """Get the queue."""
        return self._proxy_env

    @property
    def proxy_env_queue(self) -> Queue:
        """Get the queue."""
        return self._proxy_env.queue

    def setup(self) -> None:
        """Set up the task."""
        self.logger.info("Gym task: setup method called.")

    def execute(self, *args, **kwargs) -> None:
        """Execute the task."""
        if not self._proxy_env.is_rl_agent_trained and not self.is_rl_agent_training:
            self._start_training()
        if self._proxy_env.is_rl_agent_trained and self.is_rl_agent_training:
            self._stop_training()

    def teardown(self) -> None:
        """Teardown the task."""
        self.logger.info("Gym Task: teardown method called.")
        if self.is_rl_agent_training:
            self._stop_training()

    def _start_training(self) -> None:
        """Start training the RL agent."""
        self.logger.info("Training starting ...")
        self.is_rl_agent_training = True
        self._rl_agent_training_thread.start()

    def _stop_training(self) -> None:
        """Stop training the RL agent."""
        self.is_rl_agent_training = False
        self._proxy_env.close()
        self._rl_agent_training_thread.join()
예제 #2
0
    def fit(self, proxy_env: ProxyEnv, nb_steps: int) -> None:
        """
        Train the agent on the given proxy environment.

        :param proxy_env: the proxy gym environment
        :param nb_steps: number of training steps to be performed.
        :return: None
        """
        action_counter = 0

        proxy_env.reset()
        while action_counter < nb_steps:
            action = self._pick_an_action()
            obs, reward, done, info = proxy_env.step(action)
            self._update_model(obs, reward, done, info, action)
            action_counter += 1
            if action_counter % 10 == 0:
                self.logger.info(
                    "Action: step_id='{}' action='{}' reward='{}'".format(
                        action_counter, action, reward))
        proxy_env.close()