Exemple #1
0
    

def displayImage(image, step, reward, value):
    clear_output(True)
    title = "step " + str(step) + " reward: " + str(reward) + " Value: " + str(value[0][0])        
    plt.title(title)    
    im.set_data(image)
    fig.canvas.draw()
    plt.pause(0.1)
    
# init environment
env = MiniPacman(mode=mode, frame_cap=1000)

# load model
agentPath = "actor_critic_pacman_" + mode
actor_critic = ActorCritic(env.observation_space.shape, env.action_space.n)
pretrained_dict = torch.load(agentPath)
actor_critic.load_state_dict(pretrained_dict)

if USE_CUDA:    
    actor_critic = actor_critic.cuda()



# init game
done = False
state = env.reset()
total_reward = 0
step = 1

#while not done:
    state = envs.reset()
    state = FloatTensor(state)
    state_shape = envs.observation_space.shape

    writer = new_writer(LABEL, arg)

    state_shape = envs.observation_space.shape
    num_actions = envs.action_space.n
    num_rewards = len(MODE_REWARDS[arg.mode])

    env_model = EnvModel(envs.observation_space.shape,
                         num_pixels,
                         num_rewards=5)
    if USE_CUDA:
        env_model.cuda()
    distill_policy = ActorCritic(envs.observation_space.shape,
                                 envs.action_space.n)
    distill_optimizer = optim.Adam(distill_policy.parameters())

    ei_i2a = EnvIntegrated_I2A(state_shape,
                               num_actions,
                               hidden_size=256,
                               full_rollout=True,
                               env_model=env_model,
                               mode_reward=MODE_REWARDS[mode])
    imagination = ImaginationCore(arg.rollout_depth,
                                  state_shape,
                                  num_actions,
                                  num_rewards,
                                  ei_i2a.env_model,
                                  distill_policy,
                                  full_rollout=True)
Exemple #3
0
     #a2c hyperparams:
    gamma = 0.99
    entropy_coef = 0.01
    value_loss_coef = 0.5
    max_grad_norm = 0.5
    num_steps = 5
    num_frames = int(10e6)

    #rmsprop hyperparams:
    lr    = 7e-4
    eps   = 1e-5
    alpha = 0.99

    #Init a2c and rmsprop
    agent1 = ActorCritic(state_shape, num_actions)
    agent2 = ActorCritic(state_shape, num_actions)
    optimizer1 = optim.RMSprop(agent1.parameters(), lr, eps=eps, alpha=alpha)
    optimizer2 = optim.RMSprop(agent2.parameters(), lr, eps=eps, alpha=alpha)

    agent1 = make_cuda(agent1)
    agent2 = make_cuda(agent2)

    rollout1 = RolloutStorage(num_steps, num_envs, state_shape)
    rollout2 = RolloutStorage(num_steps, num_envs, state_shape)

    if USE_CUDA:        
        rollout1.cuda()
        rollout2.cuda()

    all_rewards1 = []
Exemple #4
0
    envs = SubprocVecEnv(envs)    
    
    state_shape = envs.observation_space.shape 
    num_actions = envs.action_space.n    
    
    #a2c hyperparams:
    gamma = 0.99
    entropy_coef = 0.01
    value_loss_coef = 0.5
    max_grad_norm = 0.5
    num_steps = 10
    num_frames = int(1e6)
    

    #Init a2c and rmsprop   
    actor_critic = ActorCritic(state_shape, num_actions)            
    optimizer = optim.Adam(actor_critic.parameters())
        
    actor_critic = make_cuda(actor_critic)

    rollout = RolloutStorage(num_steps, num_envs, state_shape)
   
    if USE_CUDA:        
        rollout.cuda()

    all_rewards = []
    all_losses  = []    

    state = envs.reset() 
    
    state = torch.FloatTensor(np.float32(state))
        
    #a2c hyperparams:
    gamma = 0.99
    entropy_coef = 0.01
    value_loss_coef = 0.5
    max_grad_norm = 0.5
    num_steps = 5
    num_frames = int(1e6)

    #rmsprop hyperparams:
    lr    = 7e-4
    eps   = 1e-5
    alpha = 0.99

    #Init a2c and rmsprop
    actor_critic = ActorCritic(state_shape, num_actions)
    optimizer = optim.RMSprop(actor_critic.parameters(), lr, eps=eps, alpha=alpha)
    
    if USE_CUDA:
        actor_critic = actor_critic.cuda()



    rollout = RolloutStorage(num_steps, num_envs, state_shape)
    if USE_CUDA:        
        rollout.cuda()

    all_rewards = []
    all_losses  = []    
    all_step_scores = []
    num_steps = arg.num_steps  # ステップ数
    num_frames = int(arg.num_frames)  # 訓練フレーム数
    set_random_seed(global_seed)  # グローバルのシード値
    #rmsprop hyperparams:
    #lr    = 7e-4 # default
    lr = arg.learning_rate
    eps = 1e-5
    alpha = 0.99

    envs = SubprocVecEnv([make_env(env_id, i) for i in range(num_envs)])
    state = envs.reset()
    state = torch.FloatTensor(np.float32(state)).cuda()
    state_shape = envs.observation_space.shape

    #Init a2c and rmsprop
    actor_critic = ActorCritic(envs.observation_space.shape,
                               envs.action_space.n)
    optimizer = optim.RMSprop(actor_critic.parameters(),
                              lr,
                              eps=eps,
                              alpha=alpha)
    # Init rollout storage
    rollout = RolloutStorage(num_steps, num_envs, envs.observation_space.shape)
    if USE_CUDA:
        actor_critic = actor_critic.cuda()
        rollout.cuda()

    rollout.states[0].copy_(state)

    episode_rewards = torch.zeros(num_envs, 1)
    final_rewards = torch.zeros(num_envs, 1)
Exemple #7
0
    shape = list(input.shape)
    shape[-1] = 80
    shape[-2] = 80
    upscaled = np.zeros((shape))    
    for ij in np.ndindex(upscaled.shape[-2:]):        
        i,j=ij
        upscaled[...,i,j] = input[...,i//8,j//8]
    return upscaled

if __name__ == '__main__': 
    
    # init environment
    env = Key_Collect(max_steps=50, num_keys=num_keys)
    
    # load model
    actor_critic = ActorCritic((3,10,10), env.action_space.n)
    pretrained_dict = torch.load(agentPath, map_location='cpu')
    actor_critic.load_state_dict(pretrained_dict)

    actor_critic = make_cuda(actor_critic)

    # init game
    done = False
    state = env.reset()
    
    step = 1
    total_reward = 0
    
    while True:    
        current_state = torch.FloatTensor(state)
        
Exemple #8
0
    num_steps = arg.num_steps  # ステップ数
    num_frames = int(arg.num_frames)  # 訓練フレーム数
    set_random_seed(global_seed)  # グローバルのシード値
    #rmsprop hyperparams:
    #lr    = 7e-4 # default
    lr = arg.learning_rate
    eps = 1e-5
    alpha = 0.99

    envs = SubprocVecEnv([make_env(env_id, i) for i in range(num_envs)])
    state = envs.reset()
    state = torch.FloatTensor(np.float32(state)).cuda()
    state_shape = envs.observation_space.shape

    #Init a2c and rmsprop
    actor_critic = ActorCritic(envs.observation_space.shape,
                               envs.action_space.n)
    optimizer = optim.RMSprop(actor_critic.parameters(),
                              lr,
                              eps=eps,
                              alpha=alpha)
    # Init rollout storage
    rollout = RolloutStorage(num_steps, num_envs, envs.observation_space.shape)
    if USE_CUDA:
        actor_critic = actor_critic.cuda()
        rollout.cuda()

    rollout.states[0].copy_(state)

    episode_rewards = torch.zeros(num_envs, 1)
    final_rewards = torch.zeros(num_envs, 1)