def test_solve_play(d=6, draw=False, num_episodes=10, episode_length=100): game = TempSim() qf = value_iteration(game, TabularQ(game.states, game.actions)) for i in range(num_episodes): reward, _ = sim_episode(game, (episode_length if d > 5 else episode_length / 2), lambda s: greedy(qf, s), draw=draw) print('Reward', reward)
def test_solve_play(d = 5, draw=False, num_episodes = 10, episode_length = 100): game = No_Exit(d) qf = value_iteration(game , TabularQ(game.states, game.actions)) for i in range(num_episodes): reward, _, animation = sim_episode(game, (episode_length if d > 3 else episode_length/2), lambda s: greedy(qf, s), draw=draw) print('Reward', reward) return animation