def train(model_path='models/model.h5', opponent_policy=random_choice, num_episodes=1000, agent_params={}, **kwargs): stats = statistics.default_stats() plt_data = statistics.plot_stats(stats, data=None) agent = DQNAgent(**agent_params) for episode in range(num_episodes): print('Episode {}/{}'.format(episode, num_episodes)) env = Environment(opponent_policy=opponent_policy, agent_color=board.RED, agent_first_turn=True) done = False episode_length = 0 while not done: state = env.get_state() action = agent.act_epsilon_greedy(state) next_state, reward, event = env.step(action) done = event != board.EVENT_IN_GAME agent.remember(state, action, reward, next_state, done) agent.replay(stats=stats) if event == board.EVENT_WIN: print('Won Game!') episode_length += 1 stats['episode_results'].append(event) stats['episode_lengths'].append(episode_length) plt_data = statistics.plot_stats(stats, data=plt_data) plt.pause(0.0001) if episode % 100 == 0: agent.save(model_path) agent.save(model_path) saved_params = {'agent_params': agent_params, 'num_episodes': num_episodes} statistics.save_stats( stats, saved_params, "stats/stats-{}.json".format(time.strftime("%Y%m%d-%H%M%S"))) statistics.plot_stats(stats, data=plt_data) plt.show()
episode_reward = 0.0 episode_count = 0 obs = env.reset() pb = tqdm(range(-params.memory_initial, params.max_steps)) plotter = VisdomLinePlotter() reward_history = deque(maxlen=100) for i in pb: if args.render: env.render() idx = memory.store_obs(obs) state = memory.get_stacked_obs(idx) action = agent.act_epsilon_greedy(state, eps_schedule.get(i)) next_obs, reward, done, _ = env.step(action) episode_reward += reward memory.store_effect(idx, action, np.sign(reward), done) if done: next_obs = env.reset() episode_count += 1 reward_history.append(episode_reward) pb.set_description( f"episode: {episode_count}, reward: {episode_reward}, eps: {eps_schedule.get(i)*100:.2f}%" ) plotter.plot('episode reward', 'episode return', "Episode Return", episode_count, episode_reward) plotter.plot('episode reward', 'average return', "Episode Return",