Example #1
0
dqn_heads = []
act_index = np.zeros(num_heads, dtype=int)
epsilon_eval = 0.05
done = False

for i in range(num_heads):
    the_model = torch.load('./tmp/model_epoch_200/model' + str(i) + '.pth')
    dqn_heads.append(DQN(num_actions))
    dqn_heads[i].load_state_dict(the_model)

#dqn = DQN()
#dqn.load_state_dict(the_model)

with torch.no_grad():
    var_phi = autograd.Variable(torch.Tensor(1, 4, 84, 84))

while (not done):

    for i in range(num_heads):
        act_index[i] = sample_action(atari,
                                     dqn_heads[i],
                                     var_phi,
                                     epsilon=epsilon_eval,
                                     num_actions)

    act_index_vote = np.bincount(act_index).argmax()

    phi_next, r, done = atari.step(VALID_ACTION[act_index_vote])
    atari.display()
    time.sleep(0.01)
Example #2
0
from model import DQN
from pong import Pong
import torch
import torch.autograd as autograd
from utils import sample_action


the_model = torch.load('./tmp/model.pth')

dqn = DQN()
dqn.load_state_dict(the_model)

pong = Pong()
done = False

VALID_ACTION = [0, 2, 5]
var_phi = autograd.Variable(torch.Tensor(1, 4, 84, 84), volatile=True)

while(not done):

	act_index = sample_action(pong, dqn, var_phi)

	phi_next, r, done = pong.step(VALID_ACTION[act_index])
	pong.display()