def exercise_agent(gymName, episodes, render=True, convolutional=False): max_t = 0 env = gym.make(gymName) agent = QAgent(env.action_space.n, convolutional) for i_episode in range(episodes): state = normalize(env.reset()) agent.observe(state, 0, False) total_reward = 0 for t in range(10000): if render: env.render() action = agent.act() state, reward, done, info = env.step(action) state = normalize(state) total_reward += reward agent.observe(state, reward, done) if done: max_t = max(max_t, t) print(f'{t} : {max_t} : {total_reward}') break env.close()
env = BananaEnv() agent = QAgent(action_space=env.get_action_space_size(), state_space=env.get_state_space_size()) if load_bad_network: agent.load_checkpoint(bad_opt_networks_ckp) elif load_from_seeded_64: #target and delayer weights are not actually needed here, they would be needed to resume training as it was left. agent.load_checkpoint(local_checkpoint=seeded_test_64_ckp["local"], target_checkpoint=seeded_test_64_ckp["target"], delayer_checkpoint=seeded_test_64_ckp["delayer"]) else: agent.load_checkpoint(top_opt_networks_ckp[sel_network]) env.reset() done = False for i in range(5): print("Episode {:d}\n score: ".format(i), end=" ") done = False env.reset() score = 0 while not done: exp = agent.act(env, 0) score = score + exp.reward if abs(exp.reward) > 0.01: print(str(int(score)), sep=' ', end=' ', flush=True) done = exp.done sleep(0.02) print("\nfinal score:" + str(score) + "\n") sleep(1)
score_list = [] mean_score_list = [] running_score = 0 eps_start = 1.0 eps_decay = pars["eps_decay_sel"] eps_end = 0.01 eps = eps_start max_ep_len = 400 train_episodes = 701 for episode in range(train_episodes): eps = max(eps * (eps_decay), eps_end) env.reset() done = False curr_score = 0 for act_i in range(max_ep_len): exp = agent.act(env, eps) curr_score = curr_score + exp.reward if exp.done: break if act_i % learn_every == 0: agent.learn(64) score_list.append(curr_score) score_window.append(curr_score) if episode % update_every == 0: agent.update_target() if episode % 20 == 0: print("episode " + str(episode) + ", mean score: " + str(np.mean(score_window))) if episode % 100 == 0: mean_score_list.append(np.mean(score_window)) print("test completed with scores: " + str(mean_score_list))
import gym from q_agent import QAgent env = gym.make('FrozenLake-v0') print(env.action_space) print(env.observation_space) agent = QAgent(env.observation_space, env.action_space); agent.learn(env) success = 0 for i_episode in range(100): observation = env.reset() while True: #env.render() action = agent.act(observation) observation, reward, done, info = env.step(action) if done: #print("Episode finished after {} timesteps".format(t+1)) if reward == 1.0: success += 1 break print("success rate is {}".format(success))