def test_batched_step_result_from_proto_raises_on_nan(): n_agents = 10 shapes = [(3,), (4,)] group_spec = AgentGroupSpec(shapes, ActionType.CONTINUOUS, 3) ap_list = generate_list_agent_proto(n_agents, shapes, nan_observations=True) with pytest.raises(RuntimeError): batched_step_result_from_proto(ap_list, group_spec)
def create_mock_group_spec( number_visual_observations=0, vector_action_space_type="continuous", vector_observation_space_size=3, vector_action_space_size=None, ): """ Creates a mock BrainParameters object with parameters. """ # Avoid using mutable object as default param act_type = ActionType.DISCRETE if vector_action_space_type == "continuous": act_type = ActionType.CONTINUOUS if vector_action_space_size is None: vector_action_space_size = 2 else: vector_action_space_size = vector_action_space_size[0] else: if vector_action_space_size is None: vector_action_space_size = (2, ) else: vector_action_space_size = tuple(vector_action_space_size) obs_shapes = [(vector_observation_space_size, )] for _ in range(number_visual_observations): obs_shapes += [(8, 8, 3)] return AgentGroupSpec(obs_shapes, act_type, vector_action_space_size)
def test_take_action_returns_empty_with_no_agents(): test_seed = 3 policy = FakePolicy(test_seed, basic_mock_brain(), basic_params()) # Doesn't really matter what this is dummy_groupspec = AgentGroupSpec([(1, )], "continuous", 1) no_agent_step = BatchedStepResult.empty(dummy_groupspec) result = policy.get_action(no_agent_step) assert result == ActionInfo.empty()
def test_action_masking_continuous(): n_agents = 10 shapes = [(3, ), (4, )] group_spec = AgentGroupSpec(shapes, ActionType.CONTINUOUS, 10) ap_list = generate_list_agent_proto(n_agents, shapes) result = batched_step_result_from_proto(ap_list, group_spec) masks = result.action_mask assert masks is None
def test_action_masking_discrete_1(): n_agents = 10 shapes = [(3, ), (4, )] group_spec = AgentGroupSpec(shapes, ActionType.DISCRETE, (10, )) ap_list = generate_list_agent_proto(n_agents, shapes) result = batched_step_result_from_proto(ap_list, group_spec) masks = result.action_mask assert isinstance(masks, list) assert len(masks) == 1 assert masks[0].shape == (n_agents, 10) assert masks[0][0, 0]
def __init__(self, use_discrete): super().__init__() self.discrete = use_discrete action_type = ActionType.DISCRETE if use_discrete else ActionType.CONTINUOUS self.group_spec = AgentGroupSpec([(OBS_SIZE, )], action_type, (2, ) if use_discrete else 1) # state self.position = 0.0 self.step_count = 0 self.random = random.Random(str(self.group_spec)) self.goal = self.random.choice([-1, 1]) self.action = None self.step_result = None
def test_batched_step_result_from_proto(): n_agents = 10 shapes = [(3, ), (4, )] group_spec = AgentGroupSpec(shapes, ActionType.CONTINUOUS, 3) ap_list = generate_list_agent_proto(n_agents, shapes) result = batched_step_result_from_proto(ap_list, group_spec) assert list(result.reward) == list(range(n_agents)) assert list(result.agent_id) == list(range(n_agents)) for index in range(n_agents): assert result.done[index] == (index % 2 == 0) assert result.max_step[index] == (index % 2 == 1) assert list(result.obs[0].shape) == [n_agents] + list(shapes[0]) assert list(result.obs[1].shape) == [n_agents] + list(shapes[1])
def __init__( self, brain_names, use_discrete, step_size=STEP_SIZE, num_visual=0, num_vector=1, vis_obs_size=VIS_OBS_SIZE, vec_obs_size=OBS_SIZE, action_size=1, ): super().__init__() self.discrete = use_discrete self.num_visual = num_visual self.num_vector = num_vector self.vis_obs_size = vis_obs_size self.vec_obs_size = vec_obs_size action_type = ActionType.DISCRETE if use_discrete else ActionType.CONTINUOUS self.group_spec = AgentGroupSpec( self._make_obs_spec(), action_type, tuple( 2 for _ in range(action_size)) if use_discrete else action_size, ) self.action_size = action_size self.names = brain_names self.positions: Dict[str, List[float]] = {} self.step_count: Dict[str, float] = {} self.random = random.Random(str(self.group_spec)) self.goal: Dict[str, int] = {} self.action = {} self.rewards: Dict[str, float] = {} self.final_rewards: Dict[str, List[float]] = {} self.step_result: Dict[str, BatchedStepResult] = {} self.agent_id: Dict[str, int] = {} self.step_size = step_size # defines the difficulty of the test for name in self.names: self.agent_id[name] = 0 self.goal[name] = self.random.choice([-1, 1]) self.rewards[name] = 0 self.final_rewards[name] = [] self._reset_agent(name) self.action[name] = None self.step_result[name] = None
def agent_group_spec_from_proto(brain_param_proto: BrainParametersProto, agent_info: AgentInfoProto) -> AgentGroupSpec: """ Converts brain parameter and agent info proto to AgentGroupSpec object. :param brain_param_proto: protobuf object. :param agent_info: protobuf object. :return: AgentGroupSpec object. """ observation_shape = [tuple(obs.shape) for obs in agent_info.observations] action_type = (ActionType.DISCRETE if brain_param_proto.vector_action_space_type == 0 else ActionType.CONTINUOUS) if action_type == ActionType.CONTINUOUS: action_shape: Union[int, Tuple[ int, ...]] = brain_param_proto.vector_action_size[0] else: action_shape = tuple(brain_param_proto.vector_action_size) return AgentGroupSpec(observation_shape, action_type, action_shape)
def __init__(self, brain_names, use_discrete): super().__init__() self.discrete = use_discrete action_type = ActionType.DISCRETE if use_discrete else ActionType.CONTINUOUS self.group_spec = AgentGroupSpec([(OBS_SIZE, )], action_type, (2, ) if use_discrete else 1) self.names = brain_names self.position: Dict[str, float] = {} self.step_count: Dict[str, float] = {} self.random = random.Random(str(self.group_spec)) self.goal: Dict[str, int] = {} self.action = {} self.rewards: Dict[str, float] = {} self.final_rewards: Dict[str, List[float]] = {} self.step_result: Dict[str, BatchedStepResult] = {} for name in self.names: self.goal[name] = self.random.choice([-1, 1]) self.rewards[name] = 0 self.final_rewards[name] = [] self._reset_agent(name) self.action[name] = None self.step_result[name] = None