def test_solve_play(d=6, draw=False,
                    num_episodes=10, episode_length=100):
    game = TempSim()
    qf = value_iteration(game, TabularQ(game.states, game.actions))
    for i in range(num_episodes):
        reward, _ = sim_episode(game, (episode_length if d > 5 else episode_length / 2),
                                lambda s: greedy(qf, s), draw=draw)
        print('Reward', reward)
Esempio n. 2
0
def test_solve_play(d = 5, draw=False,
                    num_episodes = 10, episode_length = 100):
    game = No_Exit(d)
    qf = value_iteration(game , TabularQ(game.states, game.actions))
    for i in range(num_episodes):
        reward, _, animation = sim_episode(game, (episode_length if d > 3 else episode_length/2),
                                lambda s: greedy(qf, s), draw=draw)
        print('Reward', reward)
    return animation