def test_batched_step_result_from_proto_raises_on_nan(): n_agents = 10 shapes = [(3,), (4,)] group_spec = AgentGroupSpec(shapes, ActionType.CONTINUOUS, 3) ap_list = generate_list_agent_proto(n_agents, shapes, nan_observations=True) with pytest.raises(RuntimeError): batched_step_result_from_proto(ap_list, group_spec)
def create_mock_group_spec( number_visual_observations=0, vector_action_space_type="continuous", vector_observation_space_size=3, vector_action_space_size=None, ): """ Creates a mock BrainParameters object with parameters. """ # Avoid using mutable object as default param act_type = ActionType.DISCRETE if vector_action_space_type == "continuous": act_type = ActionType.CONTINUOUS if vector_action_space_size is None: vector_action_space_size = 2 else: vector_action_space_size = vector_action_space_size[0] else: if vector_action_space_size is None: vector_action_space_size = (2, ) else: vector_action_space_size = tuple(vector_action_space_size) obs_shapes = [(vector_observation_space_size, )] for _ in range(number_visual_observations): obs_shapes += [(8, 8, 3)] return AgentGroupSpec(obs_shapes, act_type, vector_action_space_size)
def test_take_action_returns_empty_with_no_agents(): test_seed = 3 policy = FakePolicy(test_seed, basic_mock_brain(), basic_params()) # Doesn't really matter what this is dummy_groupspec = AgentGroupSpec([(1, )], "continuous", 1) no_agent_step = BatchedStepResult.empty(dummy_groupspec) result = policy.get_action(no_agent_step) assert result == ActionInfo.empty()
def test_action_masking_continuous(): n_agents = 10 shapes = [(3, ), (4, )] group_spec = AgentGroupSpec(shapes, ActionType.CONTINUOUS, 10) ap_list = generate_list_agent_proto(n_agents, shapes) result = batched_step_result_from_proto(ap_list, group_spec) masks = result.action_mask assert masks is None
def batched_step_result_from_proto( agent_info_list: Collection[AgentInfoProto], # pylint: disable=unsubscriptable-object group_spec: AgentGroupSpec, ) -> BatchedStepResult: obs_list: List[np.ndarray] = [] for obs_index, obs_shape in enumerate(group_spec.observation_shapes): is_visual = len(obs_shape) == 3 if is_visual: obs_shape = cast(Tuple[int, int, int], obs_shape) obs_list += [ _process_visual_observation(obs_index, obs_shape, agent_info_list) ] else: obs_list += [ _process_vector_observation(obs_index, obs_shape, agent_info_list) ] rewards = np.array([agent_info.reward for agent_info in agent_info_list], dtype=np.float32) d = np.dot(rewards, rewards) has_nan = np.isnan(d) has_inf = not np.isfinite(d) # In we have any NaN or Infs, use np.nan_to_num to replace these with finite values if has_nan or has_inf: rewards = np.nan_to_num(rewards) if has_nan: logger.warning(f"An agent had a NaN reward in the environment") done = np.array([agent_info.done for agent_info in agent_info_list], dtype=np.bool) max_step = np.array( [agent_info.max_step_reached for agent_info in agent_info_list], dtype=np.bool) agent_id = np.array([agent_info.id for agent_info in agent_info_list], dtype=np.int32) action_mask = None if group_spec.is_action_discrete(): if any([agent_info.action_mask is not None] for agent_info in agent_info_list): n_agents = len(agent_info_list) a_size = np.sum(group_spec.discrete_action_branches) mask_matrix = np.ones((n_agents, a_size), dtype=np.bool) for agent_index, agent_info in enumerate(agent_info_list): if agent_info.action_mask is not None: if len(agent_info.action_mask) == a_size: mask_matrix[agent_index, :] = [ False if agent_info.action_mask[k] else True for k in range(a_size) ] action_mask = (1 - mask_matrix).astype(np.bool) indices = _generate_split_indices( group_spec.discrete_action_branches) action_mask = np.split(action_mask, indices, axis=1) return BatchedStepResult(obs_list, rewards, done, max_step, agent_id, action_mask)
def test_action_masking_discrete_1(): n_agents = 10 shapes = [(3, ), (4, )] group_spec = AgentGroupSpec(shapes, ActionType.DISCRETE, (10, )) ap_list = generate_list_agent_proto(n_agents, shapes) result = batched_step_result_from_proto(ap_list, group_spec) masks = result.action_mask assert isinstance(masks, list) assert len(masks) == 1 assert masks[0].shape == (n_agents, 10) assert masks[0][0, 0]
def batched_step_result_from_proto( agent_info_list: Collection[AgentInfoProto], # pylint: disable=unsubscriptable-object envStat: EnvironmentStatisticsProto, group_spec: AgentGroupSpec, ) -> BatchedStepResult: obs_list: List[np.ndarray] = [] for obs_index, obs_shape in enumerate(group_spec.observation_shapes): is_visual = len(obs_shape) == 3 if is_visual: obs_shape = cast(Tuple[int, int, int], obs_shape) obs_list.append( _process_visual_observation(obs_index, obs_shape, agent_info_list)) else: obs_list.append( _process_vector_observation(obs_index, obs_shape, agent_info_list)) rewards = np.array([agent_info.reward for agent_info in agent_info_list], dtype=np.float32) _raise_on_nan_and_inf(rewards, "rewards") done = np.array([agent_info.done for agent_info in agent_info_list], dtype=np.bool) max_step = np.array( [agent_info.max_step_reached for agent_info in agent_info_list], dtype=np.bool) agent_id = np.array([agent_info.id for agent_info in agent_info_list], dtype=np.int32) action_mask = None if group_spec.is_action_discrete(): if any([agent_info.action_mask is not None] for agent_info in agent_info_list): n_agents = len(agent_info_list) a_size = np.sum(group_spec.discrete_action_branches) mask_matrix = np.ones((n_agents, a_size), dtype=np.bool) for agent_index, agent_info in enumerate(agent_info_list): if agent_info.action_mask is not None: if len(agent_info.action_mask) == a_size: mask_matrix[agent_index, :] = [ False if agent_info.action_mask[k] else True for k in range(a_size) ] action_mask = (1 - mask_matrix).astype(np.bool) indices = _generate_split_indices( group_spec.discrete_action_branches) action_mask = np.split(action_mask, indices, axis=1) # convert protobuf maps to dicts double_stat = dict( (key, envStat.double_stat[key]) for key in envStat.double_stat) string_stat = dict( (key, envStat.string_stat[key]) for key in envStat.string_stat) return BatchedStepResult(obs_list, rewards, done, max_step, agent_id, action_mask, double_stat, string_stat)
def __init__(self, use_discrete): super().__init__() self.discrete = use_discrete action_type = ActionType.DISCRETE if use_discrete else ActionType.CONTINUOUS self.group_spec = AgentGroupSpec([(OBS_SIZE, )], action_type, (2, ) if use_discrete else 1) # state self.position = 0.0 self.step_count = 0 self.random = random.Random(str(self.group_spec)) self.goal = self.random.choice([-1, 1]) self.action = None self.step_result = None
def test_batched_step_result_from_proto(): n_agents = 10 shapes = [(3, ), (4, )] group_spec = AgentGroupSpec(shapes, ActionType.CONTINUOUS, 3) ap_list = generate_list_agent_proto(n_agents, shapes) result = batched_step_result_from_proto(ap_list, group_spec) assert list(result.reward) == list(range(n_agents)) assert list(result.agent_id) == list(range(n_agents)) for index in range(n_agents): assert result.done[index] == (index % 2 == 0) assert result.max_step[index] == (index % 2 == 1) assert list(result.obs[0].shape) == [n_agents] + list(shapes[0]) assert list(result.obs[1].shape) == [n_agents] + list(shapes[1])
def __init__( self, brain_names, use_discrete, step_size=STEP_SIZE, num_visual=0, num_vector=1, vis_obs_size=VIS_OBS_SIZE, vec_obs_size=OBS_SIZE, action_size=1, ): super().__init__() self.discrete = use_discrete self.num_visual = num_visual self.num_vector = num_vector self.vis_obs_size = vis_obs_size self.vec_obs_size = vec_obs_size action_type = ActionType.DISCRETE if use_discrete else ActionType.CONTINUOUS self.group_spec = AgentGroupSpec( self._make_obs_spec(), action_type, tuple( 2 for _ in range(action_size)) if use_discrete else action_size, ) self.action_size = action_size self.names = brain_names self.positions: Dict[str, List[float]] = {} self.step_count: Dict[str, float] = {} self.random = random.Random(str(self.group_spec)) self.goal: Dict[str, int] = {} self.action = {} self.rewards: Dict[str, float] = {} self.final_rewards: Dict[str, List[float]] = {} self.step_result: Dict[str, BatchedStepResult] = {} self.agent_id: Dict[str, int] = {} self.step_size = step_size # defines the difficulty of the test for name in self.names: self.agent_id[name] = 0 self.goal[name] = self.random.choice([-1, 1]) self.rewards[name] = 0 self.final_rewards[name] = [] self._reset_agent(name) self.action[name] = None self.step_result[name] = None
def step_result_to_brain_info( step_result: BatchedStepResult, group_spec: AgentGroupSpec, agent_id_prefix: int = None, ) -> BrainInfo: n_agents = step_result.n_agents() vis_obs_indices = [] vec_obs_indices = [] for index, observation in enumerate(step_result.obs): if len(observation.shape) == 2: vec_obs_indices.append(index) elif len(observation.shape) == 4: vis_obs_indices.append(index) else: raise UnityEnvironmentException( "Invalid input received from the environment, the observation should " "either be a vector of float or a PNG image") if len(vec_obs_indices) == 0: vec_obs = np.zeros((n_agents, 0), dtype=np.float32) else: vec_obs = np.concatenate([step_result.obs[i] for i in vec_obs_indices], axis=1) vis_obs = [step_result.obs[i] for i in vis_obs_indices] mask = np.ones((n_agents, np.sum(group_spec.action_size)), dtype=np.float32) if group_spec.is_action_discrete(): mask = np.ones((n_agents, np.sum(group_spec.discrete_action_branches)), dtype=np.float32) if step_result.action_mask is not None: mask = 1 - np.concatenate(step_result.action_mask, axis=1) if agent_id_prefix is None: agent_ids = [str(ag_id) for ag_id in list(step_result.agent_id)] else: agent_ids = [ f"${agent_id_prefix}-{ag_id}" for ag_id in step_result.agent_id ] return BrainInfo( vis_obs, vec_obs, list(step_result.reward), agent_ids, list(step_result.done), list(step_result.max_step), mask, )
def agent_group_spec_from_proto(brain_param_proto: BrainParametersProto, agent_info: AgentInfoProto) -> AgentGroupSpec: """ Converts brain parameter and agent info proto to AgentGroupSpec object. :param brain_param_proto: protobuf object. :param agent_info: protobuf object. :return: AgentGroupSpec object. """ observation_shape = [tuple(obs.shape) for obs in agent_info.observations] action_type = (ActionType.DISCRETE if brain_param_proto.vector_action_space_type == 0 else ActionType.CONTINUOUS) if action_type == ActionType.CONTINUOUS: action_shape: Union[int, Tuple[ int, ...]] = brain_param_proto.vector_action_size[0] else: action_shape = tuple(brain_param_proto.vector_action_size) return AgentGroupSpec(observation_shape, action_type, action_shape)
def group_spec_to_brain_parameters( name: str, group_spec: AgentGroupSpec) -> BrainParameters: vec_size = np.sum([ shape[0] for shape in group_spec.observation_shapes if len(shape) == 1 ]) vis_sizes = [ shape for shape in group_spec.observation_shapes if len(shape) == 3 ] cam_res = [CameraResolution(s[0], s[1], s[2]) for s in vis_sizes] a_size: List[int] = [] if group_spec.is_action_discrete(): a_size += list(group_spec.discrete_action_branches) vector_action_space_type = 0 else: a_size += [group_spec.action_size] vector_action_space_type = 1 return BrainParameters(name, int(vec_size), cam_res, a_size, [], vector_action_space_type)
def __init__(self, brain_names, use_discrete): super().__init__() self.discrete = use_discrete action_type = ActionType.DISCRETE if use_discrete else ActionType.CONTINUOUS self.group_spec = AgentGroupSpec([(OBS_SIZE, )], action_type, (2, ) if use_discrete else 1) self.names = brain_names self.position: Dict[str, float] = {} self.step_count: Dict[str, float] = {} self.random = random.Random(str(self.group_spec)) self.goal: Dict[str, int] = {} self.action = {} self.rewards: Dict[str, float] = {} self.final_rewards: Dict[str, List[float]] = {} self.step_result: Dict[str, BatchedStepResult] = {} for name in self.names: self.goal[name] = self.random.choice([-1, 1]) self.rewards[name] = 0 self.final_rewards[name] = [] self._reset_agent(name) self.action[name] = None self.step_result[name] = None