コード例 #1
0
    def _try_save(self, agent: AgentABC):
        agent_logs = agent_logger.flush(self.agent_logger_cid)
        mean_test_reward = np.mean(agent_logs[0]["test_episode_total_reward"])

        if mean_test_reward > self.best_score or not self.save_best_only:
            if mean_test_reward > self.best_score:
                self.best_score = mean_test_reward
            self._save_agent(agent, mean_test_reward)
コード例 #2
0
 def on_iteration_end(self, agent: AgentABC):
     agent_logs = agent_logger.flush(self.agent_logger_cid)
     mean_test_reward = np.mean(agent_logs[0]["test_episode_total_reward"])
     break_flag = mean_test_reward >= self.target_reward
     if break_flag and self.verbose:
         print(
             "Early stopping in iteration_number %s. "
             "Achieved mean raw reward of %.4f (target was %.4f)" %
             (agent.iteration_count, mean_test_reward, self.target_reward))
     return break_flag
コード例 #3
0
    def on_iteration_end(self, agent: AgentABC):
        agent_logs = agent_logger.flush(self.agent_logger_cid)
        mean_test_reward = np.mean(agent_logs[0]["test_episode_total_reward"])
        mean_test_episode_length = np.mean(
            agent_logs[0]["test_episode_length"])
        history_summary = (mean_test_reward, mean_test_episode_length)
        if self.on_screen:
            self._log_on_screen(agent.iteration_count, history_summary)

        if self.to_file:
            self._log_to_file(agent.iteration_count, history_summary)
コード例 #4
0
    def on_iteration_end(self, agent: AgentABC):
        recent_agent_logs = agent_logger.flush(self.agent_logger_cid)
        recent_nn_logs = nn_logger.flush(self.nn_logger_cid)

        summaries = self._calculate_summaries(recent_agent_logs,
                                              recent_nn_logs)

        if self.on_screen:
            self._log_on_screen(agent.iteration_count, *summaries)

        if self.to_file:
            self._log_to_file(agent.iteration_count, *summaries)