score, steps_taken, collision = 0, 0, False # Build initial observations for each agent for a in range(flags.num_agents): agent_obs[a] = normalize_observation( obs[a], flags.tree_depth, zero_center=flags.agent_type == 'dqn') agent_obs_buffer[a] = agent_obs[a].copy() # Run an episode for step in range(max_steps): update_values = [False] * flags.num_agents action_dict = {} for a in range(flags.num_agents): if info['action_required'][a]: action_dict[a] = agent.act(agent_obs[a], eps=eps) # action_dict[a] = np.random.randint(5) update_values[a] = True steps_taken += 1 else: action_dict[a] = 0 # Environment step obs, rewards, done, info = env.step(action_dict) score += sum(rewards.values()) / flags.num_agents # Check for collisions and episode completion if step == max_steps - 1: done['__all__'] = True if any(is_collision(a) for a in obs): collision = True
lib_dir = os.path.abspath(os.path.join(path, os.pardir)) sys.path.insert(1,lib_dir) from dqn.agent import Agent ENV_NAME = 'Humanoid-v2' env = gym.make(ENV_NAME) np.random.seed(0) env.seed(0) nb_actions = 17 agent = Agent(state_size=376, action_size=17, seed=0) agent.qnetwork_local.load_state_dict(torch.load('checkpoint.pth')) for i in range(15): state = env.reset() while True: env.render() action = agent.act(state) state, reward, done, _ = env.step(action) print(reward) if done: break print("Iteration ",i) env.close()