assert (0.8 <= np.mean(total_rewards) <= 0.95) print('Well done!') if __name__ == '__main__': visualize = True mdp = FrozenLakeEnv(map_name='8x8', slip_chance=0.1) mdp.render() gamma = 0.9 num_iter = 100 min_difference = 1e-5 # Play in Frozen Lake Env state_values = {state: 0 for state in mdp.get_all_states() } # Initialize state_values # Run value iteration algo! state_values, _ = rl_value_iteration(mdp, gamma, num_iter, min_difference, state_values) # See how our agent performs - e.g. render what is going on when agent choose `optimal` value s = mdp.reset() mdp.render() rewards = [] # Save all rewards to see mean reward. for _ in range(num_iter): action = get_optimal_action(mdp, state_values, s, gamma) new_state, reward, done, _ = mdp.step(action) rewards += [reward]
else: assert (0.8 <= np.mean(total_rewards) <= 0.95) print('Well done!') if __name__ == '__main__': visualize = False mdp = FrozenLakeEnv(map_name='8x8', slip_chance=0.1) mdp.render() gamma = 0.9 num_iter = 100 min_difference = 1e-5 # Play in Frozen Lake Env state_values = mdp.get_all_states() # Initialize state_values # Run value iteration algo! init_values = {s: 0 for s in state_values} state_values, _ = rl_value_iteration(mdp, gamma, num_iter, min_difference, init_values) # See how our agent performs - e.g. render what is going on when agent choose `optimal` value s = mdp.reset() mdp.render() rewards = [] # Save all rewards to see mean reward. # Your code here! for t in range(num_iter): s, r, done, _ = mdp.step( get_optimal_action(mdp, state_values, s, gamma))
if __name__ == '__main__': visualize = True mdp = FrozenLakeEnv(map_name='8x8', slip_chance=0.1) mdp.render() gamma = 0.9 num_iter = 100 min_difference = 1e-5 visualize = True mdp = FrozenLakeEnv(map_name='8x8', slip_chance=0.1) # Play in Frozen Lake Env state_values = {s: 0 for s in mdp.get_all_states()} # Initialize state_values # Run value iteration algo! state_values, _ = rl_value_iteration(mdp, gamma, num_iter, min_difference, state_values) if visualize: draw_policy(mdp, state_values, filename='frozen_lake_visualization.png') # Let's see how it is improving in time. visualize_step_by_step(mdp, gamma, num_iter, min_difference) # Express test! rewards = mass_gaming(mdp, gamma, num_iter, 1000, 100) # Save all rewards to see mean reward.
state_values = value_iteration(mdp) s = mdp.reset() mdp.render() for t in range(100): a = get_optimal_action(mdp, state_values, s, gamma) print(a, end='\n\n') s, r, done, _ = mdp.step(a) mdp.render() if done: break #use draw_policy(mdp, state_values): state_values = {s: 0 for s in mdp.get_all_states()} for i in range(10): print("after iteration %i" % i) state_values = value_iteration(mdp, state_values, num_iter=1) draw_policy(mdp, state_values) # please ignore iter 0 at each step from IPython.display import clear_output from time import sleep mdp = FrozenLakeEnv(map_name='8x8', slip_chance=0.1) state_values = {s: 0 for s in mdp.get_all_states()} for i in range(30): clear_output(True) print("after iteration %i" % i)