Beispiel #1
0
def main():
    # step 1: loading the environment
    env = gym.make("FrozenLake-v0")

    # step 2: creating the Q-table
    state_size = env.observation_space.n
    action_size = env.action_space.n
    q = QTable(state_size, action_size)

    # step 3: creating de epsilon decay
    e = Epsilon(initial_epsilon=1.0, max_epsilon=1.0, min_epsilon=0.01, decay_rate=0.005)

    # step 4: Q-table training
    total_episodes = 100000
    max_steps = 100
    q, rewards = train_qtable(env, q, e, total_episodes, max_steps)

    print("Score over time {:.4f}".format(sum(rewards) / total_episodes))
    q.print()

    # Play
    env.reset()

    rewards = []

    for episode in range(1000):
        state = env.reset()
        step = 0
        total_rewards = 0

        for step in range(100):
            action = q.select_action(env, state)

            new_state, reward, done, info = env.step(action)

            total_rewards += reward
            state = new_state

            if done:
                break

        rewards.append(total_rewards)

        if episode % 100 == 0:
            print("******************************************")
            print("EPISODE {}".format(episode))
            print("Number of steps: {}".format(step))
            env.render()

    print("Score over time {:.4f}".format(sum(rewards) / 1000))

    env.close()
def main():
    # Step 1: create the Taxi-v2 environment
    env = gym.make("Taxi-v2")

    # Step 2: create the QTable
    q = QTable(env.observation_space.n, env.action_space.n, learning_rate=0.7, gamma=0.99)

    # Step 3: create the Epsilon decay
    e = Epsilon()

    # Step 4: Q-table training
    total_episodes = 100000
    max_steps = 100
    q, rewards = train_qtable(env, q, e, total_episodes, max_steps, verbose=True)

    print("Score over time {:.4f}".format(sum(rewards) / total_episodes))
    q.print()

    env.render()

    # Play
    env.reset()

    rewards = []

    for episode in range(1000):
        state = env.reset()
        step = 0
        total_rewards = 0

        for step in range(100):
            action = q.select_action(env, state)

            new_state, reward, done, info = env.step(action)

            total_rewards += reward
            state = new_state

            if done:
                break

        rewards.append(total_rewards)

        if episode % 100 == 0:
            print("******************************************")
            print("EPISODE {}".format(episode))
            print("Number of steps: {}".format(step))
            env.render()

    print("Score over time {:.4f}".format(sum(rewards) / 1000))

    env.close()