예제 #1
0
class GameManager():
    def __init__(self):
        # Init game state
        self.episode = 0.0
        self.win_counter = 0.0

        self.state = CardGameState(self)
        self.brain = DDQN()

        self.episode_reward = 0
        self.game_history = list()

    def update(self, dt):
        pass

    def auto_play(self):
        while self.episode < MAX_EPISODES:
            action = self.brain.get_action(self.state)

            action_to_store = np.zeros(3)
            action_to_store[action] = 1

            self.state.process(action)
            # receive game result
            reward = self.state.reward
            done = self.state.terminal

            self.episode_reward += reward

            self.brain.train(self.state, self.state.s_t, action_to_store,
                             reward, self.state.s_t1, done)

            self.state.t += 1

            self.state.update()

            if done:
                self.episode += 1
                win_rate = 0.0

                if self.episode_reward == 1:
                    self.game_history.append(1)
                else:
                    self.game_history.append(0)

                if len(self.game_history) < GAME_HISTORY_SIZE:
                    win_rate = np.sum(self.game_history) / float(
                        len(self.game_history)) * 100.0
                else:
                    self.game_history.pop(0)
                    win_rate = np.sum(
                        self.game_history) / GAME_HISTORY_SIZE * 100.0

                print("Episode {} | Win Rate = {}".format(
                    self.episode, win_rate))

                self.brain.write_summary(win_rate, self.episode)

                self.episode_reward = 0
                self.state.reset()
예제 #2
0
		if done:
			print('episode:',i_episode,'ep_r:',round(ep_r,2),'epsilon',round(RL.epsilon,2),'buffer_size:',RL.memory_count,'steps:',total_steps)
			total_reward.append(ep_r)
			break

		s = s_
		total_steps += 1

	# Test every 100 episodes
	if i_episode % 10 == 0:
		total_rewards = 0
		for i in range(TEST):
			state = env.reset()
			for j in range(STEP):
				env.render()
				action = RL.get_action(state) # direct action for test
				state,reward,done,_ = env.step(action)
				total_rewards += reward
				if done:
					break
		ave_reward = total_rewards/TEST
		print ('episode: ',i_episode,'Evaluation Average Reward:',ave_reward)

RL.plot_cost()

import matplotlib.pyplot as plt
import numpy as np
plt.plot(np.arange(len(total_reward)),total_reward)
plt.ylabel('Total Reward')
plt.xlabel('Episode ')
plt.show()