Esempio n. 1
0
    state_values = {state: 0
                    for state in mdp.get_all_states()
                    }  # Initialize state_values

    # Run value iteration algo!
    state_values, _ = rl_value_iteration(mdp, gamma, num_iter, min_difference,
                                         state_values)

    # See how our agent performs - e.g. render what is going on when agent choose `optimal` value
    s = mdp.reset()
    mdp.render()
    rewards = []  # Save all rewards to see mean reward.

    for _ in range(num_iter):
        action = get_optimal_action(mdp, state_values, s, gamma)
        new_state, reward, done, _ = mdp.step(action)
        rewards += [reward]
        s = new_state
        mdp.render()

        if done:
            break
            print('Done!')

        print(reward)

    print('Average reward: ', np.mean(rewards))

    # if visualize:
    #     draw_policy(mdp, state_values)
    # Play in Frozen Lake Env
    state_values = mdp.get_all_states()  # Initialize state_values

    # Run value iteration algo!
    init_values = {s: 0 for s in state_values}
    state_values, _ = rl_value_iteration(mdp, gamma, num_iter, min_difference,
                                         init_values)

    # See how our agent performs - e.g. render what is going on when agent choose `optimal` value
    s = mdp.reset()
    mdp.render()
    rewards = []  # Save all rewards to see mean reward.

    # Your code here!
    for t in range(num_iter):
        s, r, done, _ = mdp.step(
            get_optimal_action(mdp, state_values, s, gamma))
        rewards.append(r)
        if done:
            break
    rewards.append(np.sum(rewards))

    print('Average reward: ', np.mean(rewards))

    if visualize:
        draw_policy(mdp, state_values)

    # Let's see how it is improving in time.
    visualize_step_by_step(mdp, gamma, num_iter, min_difference)

    # Express test!
    mass_gaming(mdp, gamma, num_iter, 1000, 100)
Esempio n. 3
0
    state_values = {s: 0
                    for s in mdp.get_all_states()}  # Initialize state_values

    # Run value iteration algo!
    state_values, _ = rl_value_iteration(mdp, gamma, num_iter, min_difference,
                                         state_values)  # try

    # See how our agent performs - e.g. render what is going on when agent choose `optimal` value
    s = mdp.reset()
    mdp.render()
    rewards = []  # Save all rewards to see mean reward.

    # Your code here!
    max_steps = 10000
    for _ in range(max_steps):
        act = get_optimal_action(mdp, state_values, s, gamma)
        if act is not None:
            s, r, done, _ = mdp.step(act)
            rewards.append(r)

    print('Average reward: ', np.mean(rewards))

    if visualize:
        draw_policy(mdp, state_values)

    # Let's see how it is improving in time.
    visualize_step_by_step(mdp, gamma, num_iter, min_difference)

    # Express test!
    mass_gaming(mdp, gamma, num_iter, 1000, 100)
Esempio n. 4
0
    # Play in Frozen Lake Env
    state_values = {s: 0
                    for s in mdp.get_all_states()}  # Initialize state_values

    # Run value iteration algo!
    state_values, _ = rl_value_iteration(mdp, gamma, num_iter, min_difference,
                                         state_values)

    # See how our agent performs - e.g. render what is going on when agent choose `optimal` value
    s = mdp.reset()
    mdp.render()
    rewards = []  # Save all rewards to see mean reward.

    for step in range(1000):
        s, current_reward, is_done, _ = mdp.step(
            get_optimal_action(mdp, state_values, s, gamma))
        rewards.append(current_reward)
        if is_done:
            break

    print('Average reward: ', np.mean(rewards))

    if visualize:
        draw_policy(mdp, state_values)

    # Let's see how it is improving in time.
    #visualize_step_by_step(mdp, gamma, num_iter, min_difference)

    # Express test!
    mass_gaming(mdp, gamma, num_iter, 1000, 100)