예제 #1
0
        assert (0.8 <= np.mean(total_rewards) <= 0.95)
    print('Well done!')


if __name__ == '__main__':
    visualize = True
    mdp = FrozenLakeEnv(map_name='8x8', slip_chance=0.1)
    mdp.render()

    gamma = 0.9
    num_iter = 100
    min_difference = 1e-5

    # Play in Frozen Lake Env
    state_values = {state: 0
                    for state in mdp.get_all_states()
                    }  # Initialize state_values

    # Run value iteration algo!
    state_values, _ = rl_value_iteration(mdp, gamma, num_iter, min_difference,
                                         state_values)

    # See how our agent performs - e.g. render what is going on when agent choose `optimal` value
    s = mdp.reset()
    mdp.render()
    rewards = []  # Save all rewards to see mean reward.

    for _ in range(num_iter):
        action = get_optimal_action(mdp, state_values, s, gamma)
        new_state, reward, done, _ = mdp.step(action)
        rewards += [reward]
    else:
        assert (0.8 <= np.mean(total_rewards) <= 0.95)
    print('Well done!')


if __name__ == '__main__':
    visualize = False
    mdp = FrozenLakeEnv(map_name='8x8', slip_chance=0.1)
    mdp.render()

    gamma = 0.9
    num_iter = 100
    min_difference = 1e-5

    # Play in Frozen Lake Env
    state_values = mdp.get_all_states()  # Initialize state_values

    # Run value iteration algo!
    init_values = {s: 0 for s in state_values}
    state_values, _ = rl_value_iteration(mdp, gamma, num_iter, min_difference,
                                         init_values)

    # See how our agent performs - e.g. render what is going on when agent choose `optimal` value
    s = mdp.reset()
    mdp.render()
    rewards = []  # Save all rewards to see mean reward.

    # Your code here!
    for t in range(num_iter):
        s, r, done, _ = mdp.step(
            get_optimal_action(mdp, state_values, s, gamma))
예제 #3
0

if __name__ == '__main__':
    visualize = True
    mdp = FrozenLakeEnv(map_name='8x8', slip_chance=0.1)
    mdp.render()

    gamma = 0.9
    num_iter = 100
    min_difference = 1e-5
    visualize = True

    mdp = FrozenLakeEnv(map_name='8x8', slip_chance=0.1)
    # Play in Frozen Lake Env
    state_values = {s: 0
                    for s in mdp.get_all_states()}  # Initialize state_values
    # Run value iteration algo!
    state_values, _ = rl_value_iteration(mdp, gamma, num_iter, min_difference,
                                         state_values)

    if visualize:
        draw_policy(mdp,
                    state_values,
                    filename='frozen_lake_visualization.png')

    # Let's see how it is improving in time.
    visualize_step_by_step(mdp, gamma, num_iter, min_difference)

    # Express test!
    rewards = mass_gaming(mdp, gamma, num_iter, 1000,
                          100)  # Save all rewards to see mean reward.
예제 #4
0
state_values = value_iteration(mdp)

s = mdp.reset()
mdp.render()
for t in range(100):
    a = get_optimal_action(mdp, state_values, s, gamma)
    print(a, end='\n\n')
    s, r, done, _ = mdp.step(a)
    mdp.render()
    if done:
        break

#use draw_policy(mdp, state_values):

state_values = {s: 0 for s in mdp.get_all_states()}

for i in range(10):
    print("after iteration %i" % i)
    state_values = value_iteration(mdp, state_values, num_iter=1)
    draw_policy(mdp, state_values)
# please ignore iter 0 at each step

from IPython.display import clear_output
from time import sleep
mdp = FrozenLakeEnv(map_name='8x8', slip_chance=0.1)
state_values = {s: 0 for s in mdp.get_all_states()}

for i in range(30):
    clear_output(True)
    print("after iteration %i" % i)