def evaluate(env, config, q_table, episode, render=False, output=True): """ Evaluate configuration of SARSA on given environment initialised with given Q-table :param env (gym.Env): environment to execute evaluation on :param config (Dict[str, float]): configuration dictionary containing hyperparameters :param q_table (Dict[(Obs, Act), float]): Q-table mapping observation-action to Q-values :param episode (int): episodes of training completed :param render (bool): flag whether evaluation runs should be rendered :param output (bool): flag whether mean evaluation performance should be printed :return (float, float): mean and standard deviation of reward received over episodes """ eval_agent = SARSA( num_acts=env.action_space.n, gamma=config["gamma"], epsilon=0.0, alpha=config["alpha"], ) eval_agent.q_table = q_table episodic_rewards = [] for eps_num in range(config["eval_episodes"]): obs = env.reset() if render: env.render() sleep(1) episodic_reward = 0 done = False steps = 0 while not done and steps <= config["max_episode_steps"]: steps += 1 act = eval_agent.act(obs) n_obs, reward, done, info = env.step(act) if render: env.render() sleep(1) episodic_reward += reward obs = n_obs episodic_rewards.append(episodic_reward) mean_reward = np.mean(episodic_rewards) std_reward = np.std(episodic_rewards) if output: print( f"EVALUATION ({episode}/{CONFIG['total_eps']}): MEAN REWARD OF {mean_reward}" ) if mean_reward >= 0.9: print(f"EVALUATION: SOLVED") else: print(f"EVALUATION: NOT SOLVED!") return mean_reward, std_reward
def evaluate(env, config, q_table, render=False): """ Evaluate configuration of SARSA on given environment initialised with given Q-table :param env (gym.Env): environment to execute evaluation on :param config (Dict[str, float]): configuration dictionary containing hyperparameters :param q_table (Dict[(Obs, Act), float]): Q-table mapping observation-action to Q-values :param render (bool): flag whether evaluation runs should be rendered :return (float, float, int): mean and standard deviation of return received over episodes, number of negative returns """ eval_agent = SARSA( num_acts=env.action_space.n, gamma=config["gamma"], epsilon=0.0, alpha=config["alpha"], ) eval_agent.q_table = q_table episodic_returns = [] for eps_num in range(config["eval_episodes"]): obs = env.reset() if render: env.render() sleep(1) episodic_return = 0 done = False steps = 0 while not done and steps <= config["max_episode_steps"]: steps += 1 act = eval_agent.act(obs) n_obs, reward, done, info = env.step(act) if render: env.render() sleep(1) episodic_return += reward obs = n_obs episodic_returns.append(episodic_return) mean_return = np.mean(episodic_returns) std_return = np.std(episodic_returns) negative_returns = sum([ret < 0 for ret in episodic_returns]) return mean_return, std_return, negative_returns