Python DDQN.get_action 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: ddqn

클래스/타입: DDQN

메소드/함수: get_action

hotexamples.com에서의 예제들: 2

Python DDQN.get_action - 2개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 ddqn.DDQN.get_action에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

DDQN(20)

act(3)

load_weights(3)

load_state_dict(3)

get_action(2)

eval(2)

parameters(2)

cuda(2)

decrease_exploration_rate(1)

eps_greedy(1)

choose_action(1)

get_weights(1)

greedy(1)

learn(1)

buffer_size(1)

append_data(1)

memorize(1)

예제 #1

파일 보기

class GameManager():
    def __init__(self):
        # Init game state
        self.episode = 0.0
        self.win_counter = 0.0

        self.state = CardGameState(self)
        self.brain = DDQN()

        self.episode_reward = 0
        self.game_history = list()

    def update(self, dt):
        pass

    def auto_play(self):
        while self.episode < MAX_EPISODES:
            action = self.brain.get_action(self.state)

            action_to_store = np.zeros(3)
            action_to_store[action] = 1

            self.state.process(action)
            # receive game result
            reward = self.state.reward
            done = self.state.terminal

            self.episode_reward += reward

            self.brain.train(self.state, self.state.s_t, action_to_store,
                             reward, self.state.s_t1, done)

            self.state.t += 1

            self.state.update()

            if done:
                self.episode += 1
                win_rate = 0.0

                if self.episode_reward == 1:
                    self.game_history.append(1)
                else:
                    self.game_history.append(0)

                if len(self.game_history) < GAME_HISTORY_SIZE:
                    win_rate = np.sum(self.game_history) / float(
                        len(self.game_history)) * 100.0
                else:
                    self.game_history.pop(0)
                    win_rate = np.sum(
                        self.game_history) / GAME_HISTORY_SIZE * 100.0

                print("Episode {} | Win Rate = {}".format(
                    self.episode, win_rate))

                self.brain.write_summary(win_rate, self.episode)

                self.episode_reward = 0
                self.state.reset()

예제 #2

파일 보기

		if done:
			print('episode:',i_episode,'ep_r:',round(ep_r,2),'epsilon',round(RL.epsilon,2),'buffer_size:',RL.memory_count,'steps:',total_steps)
			total_reward.append(ep_r)
			break

		s = s_
		total_steps += 1

	# Test every 100 episodes
	if i_episode % 10 == 0:
		total_rewards = 0
		for i in range(TEST):
			state = env.reset()
			for j in range(STEP):
				env.render()
				action = RL.get_action(state) # direct action for test
				state,reward,done,_ = env.step(action)
				total_rewards += reward
				if done:
					break
		ave_reward = total_rewards/TEST
		print ('episode: ',i_episode,'Evaluation Average Reward:',ave_reward)

RL.plot_cost()

import matplotlib.pyplot as plt
import numpy as np
plt.plot(np.arange(len(total_reward)),total_reward)
plt.ylabel('Total Reward')
plt.xlabel('Episode ')
plt.show()