Ejemplo n.º 1
0
def test_batched_step_result_from_proto_raises_on_nan():
    n_agents = 10
    shapes = [(3,), (4,)]
    group_spec = AgentGroupSpec(shapes, ActionType.CONTINUOUS, 3)
    ap_list = generate_list_agent_proto(n_agents, shapes, nan_observations=True)
    with pytest.raises(RuntimeError):
        batched_step_result_from_proto(ap_list, group_spec)
Ejemplo n.º 2
0
def create_mock_group_spec(
    number_visual_observations=0,
    vector_action_space_type="continuous",
    vector_observation_space_size=3,
    vector_action_space_size=None,
):
    """
    Creates a mock BrainParameters object with parameters.
    """
    # Avoid using mutable object as default param
    act_type = ActionType.DISCRETE
    if vector_action_space_type == "continuous":
        act_type = ActionType.CONTINUOUS
        if vector_action_space_size is None:
            vector_action_space_size = 2
        else:
            vector_action_space_size = vector_action_space_size[0]
    else:
        if vector_action_space_size is None:
            vector_action_space_size = (2, )
        else:
            vector_action_space_size = tuple(vector_action_space_size)
    obs_shapes = [(vector_observation_space_size, )]
    for _ in range(number_visual_observations):
        obs_shapes += [(8, 8, 3)]
    return AgentGroupSpec(obs_shapes, act_type, vector_action_space_size)
Ejemplo n.º 3
0
def test_take_action_returns_empty_with_no_agents():
    test_seed = 3
    policy = FakePolicy(test_seed, basic_mock_brain(), basic_params())
    # Doesn't really matter what this is
    dummy_groupspec = AgentGroupSpec([(1, )], "continuous", 1)
    no_agent_step = BatchedStepResult.empty(dummy_groupspec)
    result = policy.get_action(no_agent_step)
    assert result == ActionInfo.empty()
Ejemplo n.º 4
0
def test_action_masking_continuous():
    n_agents = 10
    shapes = [(3, ), (4, )]
    group_spec = AgentGroupSpec(shapes, ActionType.CONTINUOUS, 10)
    ap_list = generate_list_agent_proto(n_agents, shapes)
    result = batched_step_result_from_proto(ap_list, group_spec)
    masks = result.action_mask
    assert masks is None
Ejemplo n.º 5
0
def batched_step_result_from_proto(
    agent_info_list: Collection[AgentInfoProto],  # pylint: disable=unsubscriptable-object
    group_spec: AgentGroupSpec,
) -> BatchedStepResult:
    obs_list: List[np.ndarray] = []
    for obs_index, obs_shape in enumerate(group_spec.observation_shapes):
        is_visual = len(obs_shape) == 3
        if is_visual:
            obs_shape = cast(Tuple[int, int, int], obs_shape)
            obs_list += [
                _process_visual_observation(obs_index, obs_shape,
                                            agent_info_list)
            ]
        else:
            obs_list += [
                _process_vector_observation(obs_index, obs_shape,
                                            agent_info_list)
            ]
    rewards = np.array([agent_info.reward for agent_info in agent_info_list],
                       dtype=np.float32)

    d = np.dot(rewards, rewards)
    has_nan = np.isnan(d)
    has_inf = not np.isfinite(d)
    # In we have any NaN or Infs, use np.nan_to_num to replace these with finite values
    if has_nan or has_inf:
        rewards = np.nan_to_num(rewards)
    if has_nan:
        logger.warning(f"An agent had a NaN reward in the environment")

    done = np.array([agent_info.done for agent_info in agent_info_list],
                    dtype=np.bool)
    max_step = np.array(
        [agent_info.max_step_reached for agent_info in agent_info_list],
        dtype=np.bool)
    agent_id = np.array([agent_info.id for agent_info in agent_info_list],
                        dtype=np.int32)
    action_mask = None
    if group_spec.is_action_discrete():
        if any([agent_info.action_mask is not None]
               for agent_info in agent_info_list):
            n_agents = len(agent_info_list)
            a_size = np.sum(group_spec.discrete_action_branches)
            mask_matrix = np.ones((n_agents, a_size), dtype=np.bool)
            for agent_index, agent_info in enumerate(agent_info_list):
                if agent_info.action_mask is not None:
                    if len(agent_info.action_mask) == a_size:
                        mask_matrix[agent_index, :] = [
                            False if agent_info.action_mask[k] else True
                            for k in range(a_size)
                        ]
            action_mask = (1 - mask_matrix).astype(np.bool)
            indices = _generate_split_indices(
                group_spec.discrete_action_branches)
            action_mask = np.split(action_mask, indices, axis=1)
    return BatchedStepResult(obs_list, rewards, done, max_step, agent_id,
                             action_mask)
Ejemplo n.º 6
0
def test_action_masking_discrete_1():
    n_agents = 10
    shapes = [(3, ), (4, )]
    group_spec = AgentGroupSpec(shapes, ActionType.DISCRETE, (10, ))
    ap_list = generate_list_agent_proto(n_agents, shapes)
    result = batched_step_result_from_proto(ap_list, group_spec)
    masks = result.action_mask
    assert isinstance(masks, list)
    assert len(masks) == 1
    assert masks[0].shape == (n_agents, 10)
    assert masks[0][0, 0]
Ejemplo n.º 7
0
def batched_step_result_from_proto(
    agent_info_list: Collection[AgentInfoProto],  # pylint: disable=unsubscriptable-object
    envStat: EnvironmentStatisticsProto,
    group_spec: AgentGroupSpec,
) -> BatchedStepResult:
    obs_list: List[np.ndarray] = []
    for obs_index, obs_shape in enumerate(group_spec.observation_shapes):
        is_visual = len(obs_shape) == 3
        if is_visual:
            obs_shape = cast(Tuple[int, int, int], obs_shape)
            obs_list.append(
                _process_visual_observation(obs_index, obs_shape,
                                            agent_info_list))
        else:
            obs_list.append(
                _process_vector_observation(obs_index, obs_shape,
                                            agent_info_list))
    rewards = np.array([agent_info.reward for agent_info in agent_info_list],
                       dtype=np.float32)

    _raise_on_nan_and_inf(rewards, "rewards")

    done = np.array([agent_info.done for agent_info in agent_info_list],
                    dtype=np.bool)
    max_step = np.array(
        [agent_info.max_step_reached for agent_info in agent_info_list],
        dtype=np.bool)
    agent_id = np.array([agent_info.id for agent_info in agent_info_list],
                        dtype=np.int32)
    action_mask = None
    if group_spec.is_action_discrete():
        if any([agent_info.action_mask is not None]
               for agent_info in agent_info_list):
            n_agents = len(agent_info_list)
            a_size = np.sum(group_spec.discrete_action_branches)
            mask_matrix = np.ones((n_agents, a_size), dtype=np.bool)
            for agent_index, agent_info in enumerate(agent_info_list):
                if agent_info.action_mask is not None:
                    if len(agent_info.action_mask) == a_size:
                        mask_matrix[agent_index, :] = [
                            False if agent_info.action_mask[k] else True
                            for k in range(a_size)
                        ]
            action_mask = (1 - mask_matrix).astype(np.bool)
            indices = _generate_split_indices(
                group_spec.discrete_action_branches)
            action_mask = np.split(action_mask, indices, axis=1)
            # convert protobuf maps to dicts
            double_stat = dict(
                (key, envStat.double_stat[key]) for key in envStat.double_stat)
            string_stat = dict(
                (key, envStat.string_stat[key]) for key in envStat.string_stat)
    return BatchedStepResult(obs_list, rewards, done, max_step, agent_id,
                             action_mask, double_stat, string_stat)
Ejemplo n.º 8
0
 def __init__(self, use_discrete):
     super().__init__()
     self.discrete = use_discrete
     action_type = ActionType.DISCRETE if use_discrete else ActionType.CONTINUOUS
     self.group_spec = AgentGroupSpec([(OBS_SIZE, )], action_type,
                                      (2, ) if use_discrete else 1)
     # state
     self.position = 0.0
     self.step_count = 0
     self.random = random.Random(str(self.group_spec))
     self.goal = self.random.choice([-1, 1])
     self.action = None
     self.step_result = None
Ejemplo n.º 9
0
def test_batched_step_result_from_proto():
    n_agents = 10
    shapes = [(3, ), (4, )]
    group_spec = AgentGroupSpec(shapes, ActionType.CONTINUOUS, 3)
    ap_list = generate_list_agent_proto(n_agents, shapes)
    result = batched_step_result_from_proto(ap_list, group_spec)
    assert list(result.reward) == list(range(n_agents))
    assert list(result.agent_id) == list(range(n_agents))
    for index in range(n_agents):
        assert result.done[index] == (index % 2 == 0)
        assert result.max_step[index] == (index % 2 == 1)
    assert list(result.obs[0].shape) == [n_agents] + list(shapes[0])
    assert list(result.obs[1].shape) == [n_agents] + list(shapes[1])
Ejemplo n.º 10
0
    def __init__(
        self,
        brain_names,
        use_discrete,
        step_size=STEP_SIZE,
        num_visual=0,
        num_vector=1,
        vis_obs_size=VIS_OBS_SIZE,
        vec_obs_size=OBS_SIZE,
        action_size=1,
    ):
        super().__init__()
        self.discrete = use_discrete
        self.num_visual = num_visual
        self.num_vector = num_vector
        self.vis_obs_size = vis_obs_size
        self.vec_obs_size = vec_obs_size
        action_type = ActionType.DISCRETE if use_discrete else ActionType.CONTINUOUS
        self.group_spec = AgentGroupSpec(
            self._make_obs_spec(),
            action_type,
            tuple(
                2
                for _ in range(action_size)) if use_discrete else action_size,
        )
        self.action_size = action_size
        self.names = brain_names
        self.positions: Dict[str, List[float]] = {}
        self.step_count: Dict[str, float] = {}
        self.random = random.Random(str(self.group_spec))
        self.goal: Dict[str, int] = {}
        self.action = {}
        self.rewards: Dict[str, float] = {}
        self.final_rewards: Dict[str, List[float]] = {}
        self.step_result: Dict[str, BatchedStepResult] = {}
        self.agent_id: Dict[str, int] = {}
        self.step_size = step_size  # defines the difficulty of the test

        for name in self.names:
            self.agent_id[name] = 0
            self.goal[name] = self.random.choice([-1, 1])
            self.rewards[name] = 0
            self.final_rewards[name] = []
            self._reset_agent(name)
            self.action[name] = None
            self.step_result[name] = None
Ejemplo n.º 11
0
def step_result_to_brain_info(
    step_result: BatchedStepResult,
    group_spec: AgentGroupSpec,
    agent_id_prefix: int = None,
) -> BrainInfo:
    n_agents = step_result.n_agents()
    vis_obs_indices = []
    vec_obs_indices = []
    for index, observation in enumerate(step_result.obs):
        if len(observation.shape) == 2:
            vec_obs_indices.append(index)
        elif len(observation.shape) == 4:
            vis_obs_indices.append(index)
        else:
            raise UnityEnvironmentException(
                "Invalid input received from the environment, the observation should "
                "either be a vector of float or a PNG image")
    if len(vec_obs_indices) == 0:
        vec_obs = np.zeros((n_agents, 0), dtype=np.float32)
    else:
        vec_obs = np.concatenate([step_result.obs[i] for i in vec_obs_indices],
                                 axis=1)
    vis_obs = [step_result.obs[i] for i in vis_obs_indices]
    mask = np.ones((n_agents, np.sum(group_spec.action_size)),
                   dtype=np.float32)
    if group_spec.is_action_discrete():
        mask = np.ones((n_agents, np.sum(group_spec.discrete_action_branches)),
                       dtype=np.float32)
        if step_result.action_mask is not None:
            mask = 1 - np.concatenate(step_result.action_mask, axis=1)
    if agent_id_prefix is None:
        agent_ids = [str(ag_id) for ag_id in list(step_result.agent_id)]
    else:
        agent_ids = [
            f"${agent_id_prefix}-{ag_id}" for ag_id in step_result.agent_id
        ]
    return BrainInfo(
        vis_obs,
        vec_obs,
        list(step_result.reward),
        agent_ids,
        list(step_result.done),
        list(step_result.max_step),
        mask,
    )
Ejemplo n.º 12
0
def agent_group_spec_from_proto(brain_param_proto: BrainParametersProto,
                                agent_info: AgentInfoProto) -> AgentGroupSpec:
    """
    Converts brain parameter and agent info proto to AgentGroupSpec object.
    :param brain_param_proto: protobuf object.
    :param agent_info: protobuf object.
    :return: AgentGroupSpec object.
    """
    observation_shape = [tuple(obs.shape) for obs in agent_info.observations]
    action_type = (ActionType.DISCRETE
                   if brain_param_proto.vector_action_space_type == 0 else
                   ActionType.CONTINUOUS)
    if action_type == ActionType.CONTINUOUS:
        action_shape: Union[int, Tuple[
            int, ...]] = brain_param_proto.vector_action_size[0]
    else:
        action_shape = tuple(brain_param_proto.vector_action_size)
    return AgentGroupSpec(observation_shape, action_type, action_shape)
Ejemplo n.º 13
0
def group_spec_to_brain_parameters(
        name: str, group_spec: AgentGroupSpec) -> BrainParameters:
    vec_size = np.sum([
        shape[0] for shape in group_spec.observation_shapes if len(shape) == 1
    ])
    vis_sizes = [
        shape for shape in group_spec.observation_shapes if len(shape) == 3
    ]
    cam_res = [CameraResolution(s[0], s[1], s[2]) for s in vis_sizes]
    a_size: List[int] = []
    if group_spec.is_action_discrete():
        a_size += list(group_spec.discrete_action_branches)
        vector_action_space_type = 0
    else:
        a_size += [group_spec.action_size]
        vector_action_space_type = 1
    return BrainParameters(name, int(vec_size), cam_res, a_size, [],
                           vector_action_space_type)
Ejemplo n.º 14
0
    def __init__(self, brain_names, use_discrete):
        super().__init__()
        self.discrete = use_discrete
        action_type = ActionType.DISCRETE if use_discrete else ActionType.CONTINUOUS
        self.group_spec = AgentGroupSpec([(OBS_SIZE, )], action_type,
                                         (2, ) if use_discrete else 1)
        self.names = brain_names
        self.position: Dict[str, float] = {}
        self.step_count: Dict[str, float] = {}
        self.random = random.Random(str(self.group_spec))
        self.goal: Dict[str, int] = {}
        self.action = {}
        self.rewards: Dict[str, float] = {}
        self.final_rewards: Dict[str, List[float]] = {}
        self.step_result: Dict[str, BatchedStepResult] = {}

        for name in self.names:
            self.goal[name] = self.random.choice([-1, 1])
            self.rewards[name] = 0
            self.final_rewards[name] = []
            self._reset_agent(name)
            self.action[name] = None
            self.step_result[name] = None