コード例 #1
0
    score, steps_taken, collision = 0, 0, False

    # Build initial observations for each agent
    for a in range(flags.num_agents):
        agent_obs[a] = normalize_observation(
            obs[a], flags.tree_depth, zero_center=flags.agent_type == 'dqn')
        agent_obs_buffer[a] = agent_obs[a].copy()

    # Run an episode
    for step in range(max_steps):
        update_values = [False] * flags.num_agents
        action_dict = {}

        for a in range(flags.num_agents):
            if info['action_required'][a]:
                action_dict[a] = agent.act(agent_obs[a], eps=eps)
                # action_dict[a] = np.random.randint(5)
                update_values[a] = True
                steps_taken += 1
            else:
                action_dict[a] = 0

        # Environment step
        obs, rewards, done, info = env.step(action_dict)
        score += sum(rewards.values()) / flags.num_agents

        # Check for collisions and episode completion
        if step == max_steps - 1:
            done['__all__'] = True
        if any(is_collision(a) for a in obs):
            collision = True
コード例 #2
0
lib_dir = os.path.abspath(os.path.join(path, os.pardir))
sys.path.insert(1,lib_dir) 

from dqn.agent import Agent

ENV_NAME = 'Humanoid-v2'
env = gym.make(ENV_NAME)

np.random.seed(0)
env.seed(0)
nb_actions = 17
agent = Agent(state_size=376, action_size=17, seed=0)

agent.qnetwork_local.load_state_dict(torch.load('checkpoint.pth'))

for i in range(15):
    state = env.reset()
    while True: 
        env.render()
        action = agent.act(state)
        state, reward, done, _ = env.step(action)
        print(reward)
        if done:
            break 
    print("Iteration ",i)
            
env.close()