Exemple #1
0
def displayImage(image, step, reward, value):
    clear_output(True)
    title = "step " + str(step) + " reward: " + str(reward) + " Value: " + str(value[0][0])        
    plt.title(title)    
    im.set_data(image)
    fig.canvas.draw()
    plt.pause(0.1)
    
# init environment
env = MiniPacman(mode=mode, frame_cap=1000)

# load model
agentPath = "actor_critic_pacman_" + mode
actor_critic = ActorCritic(env.observation_space.shape, env.action_space.n)
pretrained_dict = torch.load(agentPath)
actor_critic.load_state_dict(pretrained_dict)

if USE_CUDA:    
    actor_critic = actor_critic.cuda()



# init game
done = False
state = env.reset()
total_reward = 0
step = 1

#while not done:
while True:    
    current_state = torch.FloatTensor(state).unsqueeze(0)
Exemple #2
0
    if USE_CUDA:
        actor_critic = actor_critic.cuda()
        rollout.cuda()

    rollout.states[0].copy_(state)

    episode_rewards = torch.zeros(num_envs, 1)
    final_rewards = torch.zeros(num_envs, 1)

    writer = new_writer(LABEL, arg)

    a2c_model_path = './trained_models/tmp_a2c_{}_{}'.format(
        arg.mode, arg.global_seed)
    if os.path.exists(a2c_model_path):
        print('Load A2C model from ', a2c_model_path)
        actor_critic.load_state_dict(torch.load(a2c_model_path))
    else:
        print('Start training A2C model')
        for i_update in tqdm(range(arg.num_frames)):

            for step in range(num_steps):
                action = actor_critic.act(state.cuda())

                next_state, reward, done, _ = envs.step(
                    action.squeeze(1).cpu().data.numpy())

                reward = process_reward(reward, MODE_REWARDS[mode])
                reward = torch.FloatTensor(reward).unsqueeze(1)
                episode_rewards += reward
                masks = torch.FloatTensor(1 - np.array(done)).unsqueeze(1)
                final_rewards *= masks