Esempio n. 1
0
"""
# MACROS
LEFT = 0
DOWN = 1
RIGHT = 2
UP = 3

# Key mapping
arrow_keys = {'\x1b[A': UP, '\x1b[B': DOWN, '\x1b[C': RIGHT, '\x1b[D': LEFT}

# is_slippery True
env = gym.make('FrozenLake-v0')

env.reset()

print_utils.clear_screen()
env.render()  # Show the initial board

while True:
    # Choose an action from keyboard
    key = readchar.readkey()

    if key not in arrow_keys.keys():
        print("Game aborted!")
        break

    action = arrow_keys[key]
    state, reward, done, info = env.step(action)

    # Show the board after action
    print_utils.clear_screen()
Esempio n. 2
0
def main():
    """Main"""
    frozone_lake_env = gym.make("FrozenLake-v0")

    # Initialize table with all zeros
    Q = np.zeros([N_STATES, N_ACTIONS])

    # Set learning parameters

    # create lists to contain total rewards and steps per episode
    rewards = []

    for i in range(N_EPISODES):
        # Reset environment and get first new observation
        state = frozone_lake_env.reset()
        episode_reward = 0
        done = False

        # The Q-Table learning algorithm
        while not done:
            # Choose an action by greedily (with noise) picking from Q table
            action = np.argmax(Q[state, :] + np.random.randn(1, N_ACTIONS) /
                               (i + 1))

            # Get new state and reward from environment
            new_state, reward, done, _ = frozone_lake_env.step(action)

            # Update Q-Table with new knowledge using learning rate
            Q[state,
              action] = (1 -
                         LEARNING_RATE) * Q[state, action] + LEARNING_RATE * (
                             reward + DISCOUNT_RATE * np.max(Q[new_state, :]))

            episode_reward += reward
            state = new_state

        rewards.append(episode_reward)

    print("Score over time: " + str(sum(rewards) / N_EPISODES))
    print("Final Q-Table Values")

    for i in range(10):
        # Reset environment and get first new observation
        state = frozone_lake_env.reset()
        episode_reward = 0
        done = False

        # The Q-Table learning algorithm
        while not done:
            # Choose an action by greedily (with noise) picking from Q table
            action = np.argmax(Q[state, :])

            # Get new state and reward from environment
            new_state, reward, done, _ = frozone_lake_env.step(action)
            print_utils.clear_screen()
            frozone_lake_env.render()
            time.sleep(.1)

            # Update Q-Table with new knowledge using learning rate
            Q[state, action] = (1 - LEARNING_RATE) * Q[state, action] \
                + LEARNING_RATE * (reward + DISCOUNT_RATE * np.max(Q[new_state, :]))

            episode_reward += reward
            state = new_state

            if done:
                print("Episode Reward: {}".format(episode_reward))
                print_utils.print_result(episode_reward)

        rewards.append(episode_reward)

    frozone_lake_env.close()