def visualize_step_by_step(mdp, gamma, max_iter_number, min_difference): fig = plt.figure(figsize=(5, 5)) state_values = {state: 0 for state in mdp.get_all_states()} for i in range(max_iter_number): new_state_values, done = rl_value_iteration(mdp, gamma, 1, min_difference, state_values) if done: break draw_policy(mdp, new_state_values, fig) state_values = new_state_values
def mass_gaming(mdp, gamma, num_iter, games_number, steps_number): state_values = {state: 0 for state in mdp.get_all_states()} state_values, _ = rl_value_iteration(mdp, gamma, num_iter, min_difference, state_values) total_rewards = [] for game_i in range(games_number): s = mdp.reset() rewards = [] for t in range(steps_number): s, r, done, _ = mdp.step(get_optimal_action(mdp, state_values, s, gamma)) rewards.append(r) if done: break total_rewards.append(np.sum(rewards)) print('Average reward: ', np.mean(total_rewards)) if mdp.slip_chance == 0: assert (1.0 <= np.mean(total_rewards) <= 1.0) else: assert (0.8 <= np.mean(total_rewards) <= 0.95) print('Well done!')
if __name__ == '__main__': visualize = True mdp = FrozenLakeEnv(map_name='8x8', slip_chance=0.1) mdp.render() gamma = 0.9 num_iter = 100 min_difference = 1e-5 # Play in Frozen Lake Env state_values = {state: 0 for state in mdp.get_all_states() } # Initialize state_values # Run value iteration algo! state_values, _ = rl_value_iteration(mdp, gamma, num_iter, min_difference, state_values) # See how our agent performs - e.g. render what is going on when agent choose `optimal` value s = mdp.reset() mdp.render() rewards = [] # Save all rewards to see mean reward. for _ in range(num_iter): action = get_optimal_action(mdp, state_values, s, gamma) new_state, reward, done, _ = mdp.step(action) rewards += [reward] s = new_state mdp.render() if done: break