def test_learn_play(d = 6, num_layers = 2, num_units = 100, eps = 0.5, iters = 10000, draw=False, tabular = True, batch=False, batch_epochs=10, num_episodes = 10, episode_length = 100): iters_per_value = 1 if iters <= 10 else int(iters / 10.0) scores = [] def interact(q, iter=0): if iter % iters_per_value == 0: scores.append((iter, evaluate(game, num_episodes, episode_length, lambda s: greedy(q, s))[0])) print('score', scores[-1], flush=True) game = No_Exit(d) if tabular: q = TabularQ(game.states, game.actions) else: q = NNQ(game.states, game.actions, game.state2vec, num_layers, num_units, epochs=batch_epochs if batch else 1) if batch: qf = Q_learn_batch(game, q, iters=iters, episode_length = 100, n_episodes=10, interactive_fn=interact) else: qf = Q_learn(game, q, iters=iters, interactive_fn=interact) if scores: print('String to upload (incude quotes): "%s"'%toHex(pickle.dumps([tabular, batch, scores], 0).decode())) # Plot learning curve plot_points(np.array([s[0] for s in scores]), np.array([s[1] for s in scores])) for i in range(num_episodes): reward, _, animation = sim_episode(game, (episode_length if d > 5 else episode_length/2), lambda s: greedy(qf, s), draw=draw) print('Reward', reward) return animation
def test_learn_play(game=None, q=None, num_layers=2, num_units=100, eps=0.5, iters=10000, draw=False, tabular=True, batch=False, batch_epochs=10, num_episodes=2, episode_length=500): iters_per_value = 1 if iters <= 10 else int(iters / 10.0) scores = [] def interact(q, iter=0): if iter % iters_per_value == 0: scores.append((iter, evaluate(game, num_episodes, episode_length, lambda s: greedy(q, s))[0])) print('score', scores[-1]) if not game: game = TempSim() global r_stvar if not q: if tabular: r_stvar = round q = TabularQ(game.states, game.actions) else: r_stvar = float q = NNQ(game.states, game.actions, game.state2vec, num_layers, num_units, epochs=batch_epochs if batch else 1) try: if batch: qf = Q_learn_batch(game, q, iters=iters, episode_length=100, n_episodes=10, eps=eps, interactive_fn=interact) else: qf = Q_learn(game, q, iters=iters, eps=eps, interactive_fn=interact) except KeyboardInterrupt: pass emulate(game, q, episode_length=episode_length) return game, q