def train_dqn(episodes, env, render_frequency=0): now = datetime.datetime.now() id = f'{now.hour}{now.minute}' episode_rewards = [] agent = DQN(env, params) best_score = 0 for episode in range(episodes): rendering = render_frequency and episode % render_frequency == 0 and isinstance( env, HeadlessSnake) state = env.reset( ) # Reset enviroment before each episode to start fresh if rendering: renderer = Renderer(env, episode + 1) env.update_episode(episode + 1) # state = np.reshape(state, (1, env.state_space)) total_reward = 0 max_steps = 10000 for step in range(max_steps): # 1. Find next action using the Epsilon-Greedy exploration Strategy action = agent.get_action(state) # 2. perform action in enviroment next_state, reward, done, _ = env.step(action) total_reward += reward # next_state = np.reshape(next_state, (1, env.state_space)) if rendering: renderer.update() # 3. Update the Q-function (train model) agent.remember(state, action, reward, next_state, done) agent.train_with_experience_replay() # 4. Change exploration vs. explotation probability agent.update_exploration_strategy(episode) state = next_state if done: print( f'episode: {episode+1}/{episodes}, score: {total_reward}, steps: {step}, ' f'epsilon: {agent.epsilon}, highscore: {env.maximum}') save_model(id, agent, best_score, total_reward) break if rendering: renderer.bye() save_model(id, agent, best_score, total_reward) episode_rewards.append(total_reward) return episode_rewards
def test_dqn(env): agent = DQN(env, params) agent.load_model(sys.argv[1], sys.argv[2]) state = env.reset() # Reset enviroment before each episode to start fresh state = np.reshape(state, (1, env.state_space)) max_steps = 10000 total_reward = 0 for step in range(max_steps): action = agent.get_action(state) next_state, reward, done, _ = env.step(action) state = np.reshape(next_state, (1, env.state_space)) total_reward += reward time.sleep(0.1) if done: print(f'Score: {total_reward}, steps: {step}') break return