dqn_heads = [] act_index = np.zeros(num_heads, dtype=int) epsilon_eval = 0.05 done = False for i in range(num_heads): the_model = torch.load('./tmp/model_epoch_200/model' + str(i) + '.pth') dqn_heads.append(DQN(num_actions)) dqn_heads[i].load_state_dict(the_model) #dqn = DQN() #dqn.load_state_dict(the_model) with torch.no_grad(): var_phi = autograd.Variable(torch.Tensor(1, 4, 84, 84)) while (not done): for i in range(num_heads): act_index[i] = sample_action(atari, dqn_heads[i], var_phi, epsilon=epsilon_eval, num_actions) act_index_vote = np.bincount(act_index).argmax() phi_next, r, done = atari.step(VALID_ACTION[act_index_vote]) atari.display() time.sleep(0.01)
from model import DQN from pong import Pong import torch import torch.autograd as autograd from utils import sample_action the_model = torch.load('./tmp/model.pth') dqn = DQN() dqn.load_state_dict(the_model) pong = Pong() done = False VALID_ACTION = [0, 2, 5] var_phi = autograd.Variable(torch.Tensor(1, 4, 84, 84), volatile=True) while(not done): act_index = sample_action(pong, dqn, var_phi) phi_next, r, done = pong.step(VALID_ACTION[act_index]) pong.display()