Beispiel #1
0
    envs = VecEnv([
        make_visual_env('./scenarios/deathmatch_maze.cfg')
        for i in range(num_envs)
    ])
else:
    envs = VecEnv([
        make_env(0, './scenarios/deathmatch_maze.cfg') for i in range(num_envs)
    ])

# Define some actions. Each list entry corresponds to declared buttons:
# MOVE_LEFT, MOVE_RIGHT, ATTACK
# 5 more combinations are naturally possible but only 3 are included for transparency when watching.
# actions = [[True, False, False], [False, True, False], [False, False, True]]
actions = range(envs.action_space_shape)
episode_num = 0

while True:
    print('Episode #', episode_num)
    for j in range(1000):
        action_array = [choice(actions) for i in range(num_envs)]
        # print (action_array)
        obs, reward, done, info = envs.step(action_array)
        if done:
            game_vars = envs.get_game_variables(0)
            print('Kills : ', game_vars[2])
            episode_num += 1
            break
        # print ('Reward:', reward)
        sleep(0.01)
    envs.reset()
    sleep(0.1)
Beispiel #2
0
while episode_cnt < num_episodes:
    # sleep(0.01)
    print(actor_critic.get_probs(Variable(current_obs, volatile=True)))
    value, action = actor_critic.act(Variable(current_obs, volatile=True),
                                     deterministic=True)
    cpu_actions = action.data.cpu().numpy()

    print('Action:', [cpu_actions[0]])

    # Obser reward and next obs
    obs, reward, done, _ = envs.step([cpu_actions[0]])
    episode_reward += reward[0]

    if done:
        total_reward += episode_reward
        episode_cnt += 1
        episode_reward = 0.0
        episode_game_variables = envs.get_game_variables(0)
        if episode_game_variables != None:
            total_kills += episode_game_variables[2]
        obs = envs.reset()
        actor_critic = torch.load(
            os.path.join(args.load_dir, args.env_name + ".pt"))
        actor_critic.eval()

    update_current_obs(obs)

print('Avg reward:', round(total_reward / num_episodes))
print('Avg kills:', (total_kills / num_episodes))
envs.close()