def _try_save(self, agent: AgentABC): agent_logs = agent_logger.flush(self.agent_logger_cid) mean_test_reward = np.mean(agent_logs[0]["test_episode_total_reward"]) if mean_test_reward > self.best_score or not self.save_best_only: if mean_test_reward > self.best_score: self.best_score = mean_test_reward self._save_agent(agent, mean_test_reward)
def on_iteration_end(self, agent: AgentABC): agent_logs = agent_logger.flush(self.agent_logger_cid) mean_test_reward = np.mean(agent_logs[0]["test_episode_total_reward"]) break_flag = mean_test_reward >= self.target_reward if break_flag and self.verbose: print( "Early stopping in iteration_number %s. " "Achieved mean raw reward of %.4f (target was %.4f)" % (agent.iteration_count, mean_test_reward, self.target_reward)) return break_flag
def on_iteration_end(self, agent: AgentABC): agent_logs = agent_logger.flush(self.agent_logger_cid) mean_test_reward = np.mean(agent_logs[0]["test_episode_total_reward"]) mean_test_episode_length = np.mean( agent_logs[0]["test_episode_length"]) history_summary = (mean_test_reward, mean_test_episode_length) if self.on_screen: self._log_on_screen(agent.iteration_count, history_summary) if self.to_file: self._log_to_file(agent.iteration_count, history_summary)
def on_iteration_end(self, agent: AgentABC): recent_agent_logs = agent_logger.flush(self.agent_logger_cid) recent_nn_logs = nn_logger.flush(self.nn_logger_cid) summaries = self._calculate_summaries(recent_agent_logs, recent_nn_logs) if self.on_screen: self._log_on_screen(agent.iteration_count, *summaries) if self.to_file: self._log_to_file(agent.iteration_count, *summaries)