Esempio n. 1
0
            # RL take action and get next state and reward
            _, next_state_index, reward, done = env.step(action)

            # RL choose action based on next state
            next_action = RL.choose_action(str(next_state_index))

            # RL learn from this transition (s, a, r, s, a) ==> Sarsa
            RL.learn(str(state), action, reward, str(next_state_index), next_action)

            # swap state and action
            state = next_state_index
            action = next_action

            # break while loop when end of this episode
            if done:
                break

    # end of game
    print('game over')
    env.destroy()


if __name__ == "__main__":
    env = GridWorld()
    RL = Sarsa(actions=list(range(env.n_actions)))

    env.after(10000, update)
    env.mainloop()
    print(RL.q_table)