Ejemplo n.º 1
0
def test_take_action_returns_nones_on_missing_values():
    test_seed = 3
    policy = TFPolicy(test_seed, basic_mock_brain(), basic_params())
    policy.evaluate = MagicMock(return_value={})
    policy.save_memories = MagicMock()
    brain_info_with_agents = BrainInfo([], [], [],
                                       agents=["an-agent-id"],
                                       local_done=[False])
    result = policy.get_action(brain_info_with_agents)
    assert result == ActionInfo(None, None, {})
Ejemplo n.º 2
0
def test_take_action_returns_action_info_when_available():
    test_seed = 3
    policy = TFPolicy(test_seed, basic_mock_brain(), basic_params())
    policy_eval_out = {
        "action": np.array([1.0], dtype=np.float32),
        "memory_out": np.array([[2.5]], dtype=np.float32),
        "value": np.array([1.1], dtype=np.float32),
    }
    policy.evaluate = MagicMock(return_value=policy_eval_out)
    brain_info_with_agents = BrainInfo([], [], [],
                                       agents=["an-agent-id"],
                                       local_done=[False])
    result = policy.get_action(brain_info_with_agents)
    expected = ActionInfo(policy_eval_out["action"], policy_eval_out["value"],
                          policy_eval_out)
    assert result == expected
Ejemplo n.º 3
0
def step_result_to_brain_info(
    step_result: BatchedStepResult,
    group_spec: AgentGroupSpec,
    agent_id_prefix: int = None,
) -> BrainInfo:
    n_agents = step_result.n_agents()
    vis_obs_indices = []
    vec_obs_indices = []
    for index, observation in enumerate(step_result.obs):
        if len(observation.shape) == 2:
            vec_obs_indices.append(index)
        elif len(observation.shape) == 4:
            vis_obs_indices.append(index)
        else:
            raise UnityEnvironmentException(
                "Invalid input received from the environment, the observation should "
                "either be a vector of float or a PNG image")
    if len(vec_obs_indices) == 0:
        vec_obs = np.zeros((n_agents, 0), dtype=np.float32)
    else:
        vec_obs = np.concatenate([step_result.obs[i] for i in vec_obs_indices],
                                 axis=1)
    vis_obs = [step_result.obs[i] for i in vis_obs_indices]
    mask = np.ones((n_agents, np.sum(group_spec.action_size)),
                   dtype=np.float32)
    if group_spec.is_action_discrete():
        mask = np.ones((n_agents, np.sum(group_spec.discrete_action_branches)),
                       dtype=np.float32)
        if step_result.action_mask is not None:
            mask = 1 - np.concatenate(step_result.action_mask, axis=1)
    if agent_id_prefix is None:
        agent_ids = [str(ag_id) for ag_id in list(step_result.agent_id)]
    else:
        agent_ids = [
            f"${agent_id_prefix}-{ag_id}" for ag_id in step_result.agent_id
        ]
    return BrainInfo(
        vis_obs,
        vec_obs,
        list(step_result.reward),
        agent_ids,
        list(step_result.done),
        list(step_result.max_step),
        mask,
    )
Ejemplo n.º 4
0
 def construct_curr_info(self, next_info: BrainInfo) -> BrainInfo:
     """
     Constructs a BrainInfo which contains the most recent previous experiences for all agents
     which correspond to the agents in a provided next_info.
     :BrainInfo next_info: A t+1 BrainInfo.
     :return: curr_info: Reconstructed BrainInfo to match agents of next_info.
     """
     visual_observations: List[List[Any]] = [
         [] for _ in next_info.visual_observations
     ]  # TODO add types to brain.py methods
     vector_observations = []
     rewards = []
     local_dones = []
     max_reacheds = []
     agents = []
     action_masks = []
     for agent_id in next_info.agents:
         agent_brain_info = self.processing_buffer[agent_id].last_brain_info
         if agent_brain_info is None:
             agent_brain_info = next_info
         agent_index = agent_brain_info.agents.index(agent_id)
         for i in range(len(next_info.visual_observations)):
             visual_observations[i].append(
                 agent_brain_info.visual_observations[i][agent_index])
         vector_observations.append(
             agent_brain_info.vector_observations[agent_index])
         rewards.append(agent_brain_info.rewards[agent_index])
         local_dones.append(agent_brain_info.local_done[agent_index])
         max_reacheds.append(agent_brain_info.max_reached[agent_index])
         agents.append(agent_brain_info.agents[agent_index])
         action_masks.append(agent_brain_info.action_masks[agent_index])
     curr_info = BrainInfo(
         visual_observations,
         vector_observations,
         rewards,
         agents,
         local_dones,
         max_reacheds,
         action_masks,
     )
     return curr_info
Ejemplo n.º 5
0
def test_take_action_returns_empty_with_no_agents():
    test_seed = 3
    policy = TFPolicy(test_seed, basic_mock_brain(), basic_params())
    no_agent_brain_info = BrainInfo([], [], [], agents=[])
    result = policy.get_action(no_agent_brain_info)
    assert result == ActionInfo([], [], None)