def observation_spec(self) -> types.Observation: return { agent: types.OLT( observation=_convert_to_spec( self.observation_space["observation"]), legal_actions=_convert_to_spec( self.observation_space["action_mask"]), terminal=specs.Array((1, ), np.float32), ) for agent in self._possible_agents }
def observation_spec(self) -> types.Observation: observation_specs = {} for agent in self.possible_agents: observation_specs[agent] = types.OLT( observation=_convert_to_spec( self._environment.observation_spaces[agent]), legal_actions=_convert_to_spec( self._environment.action_spaces[agent]), terminal=specs.Array((1, ), np.float32), ) return observation_specs
def observation_spec(self) -> Dict[str, OLT]: observation_specs = {} for agent in self._environment.agent_ids: observation_specs[agent] = OLT( observation=_convert_to_spec( self._environment.observation_spaces[agent] ), legal_actions=_convert_to_spec(self._environment.action_spaces[agent]), terminal=specs.Array((1,), np.float32), ) return observation_specs
def observation_spec(self) -> Dict[str, OLT]: observation_specs = {} for agent in self.agents: observation_specs[agent] = OLT( observation=tuple(( _convert_to_spec(self.observation_spaces[agent]), agent_info_spec(), )) if self._include_agent_info else _convert_to_spec( self.observation_spaces[agent]), legal_actions=_convert_to_spec(self.action_spaces[agent]), terminal=specs.Array((1, ), np.float32), ) return observation_specs
def action_spec( self) -> Dict[str, Union[specs.DiscreteArray, specs.BoundedArray]]: action_specs = {} action_spaces = self.action_spaces for agent in self.possible_agents: action_specs[agent] = _convert_to_spec(action_spaces[agent]) return action_specs
def _convert_observation( # type: ignore[override] self, agent: str, observe: Union[dict, np.ndarray], done: bool) -> types.OLT: legals: np.ndarray = None observation: np.ndarray = None if isinstance(observe, dict) and "action_mask" in observe: legals = observe["action_mask"] observation = observe["observation"] else: legals = np.ones( _convert_to_spec(self._environment.action_spaces[agent]).shape, dtype=self._environment.action_spaces[agent].dtype, ) observation = observe if observation.dtype == np.int8: observation = np.dtype(np.float32).type( observation) # observation is not expected to be int8 if legals.dtype == np.int8: legals = np.dtype(np.int64).type(legals) observation = types.OLT( observation=observation, legal_actions=legals, terminal=np.asarray([done], dtype=np.float32), ) return observation
def _convert_observations( self, observes: Dict[str, np.ndarray], dones: Dict[str, bool] ) -> Dict[str, OLT]: observations: Dict[str, OLT] = {} for agent, observation in observes.items(): if isinstance(observation, dict) and "action_mask" in observation: legals = observation["action_mask"] observation = observation["observation"] else: # TODO Handle legal actions better for continuous envs, # maybe have min and max for each action and clip the agents actions # accordingly legals = np.ones( _convert_to_spec(self._environment.action_spaces[agent]).shape, dtype=self._environment.action_spaces[agent].dtype, ) observation = np.array(observation, dtype=np.float32) observations[agent] = OLT( observation=observation, legal_actions=legals, terminal=np.asarray([dones[agent]], dtype=np.float32), ) return observations
def test_multi_discrete(self): space = gym.spaces.MultiDiscrete([2, 3]) spec = gym_wrapper._convert_to_spec(space) spec.validate([0, 0]) spec.validate([1, 2]) self.assertRaises(ValueError, spec.validate, [2, 2]) self.assertRaises(ValueError, spec.validate, [1, 3])
def observation_spec(self) -> types.Observation: observation_specs = {} for agent in self._environment.possible_agents: if isinstance(self._environment.observation_spaces[agent], gym.spaces.Dict): obs_space = copy.deepcopy( self._environment.observation_spaces[agent]["observation"]) legal_actions_space = copy.deepcopy( self._environment.observation_spaces[agent]["action_mask"]) else: obs_space = copy.deepcopy( self._environment.observation_spaces[agent]) legal_actions_space = copy.deepcopy( self._environment.action_spaces[agent]) if obs_space.dtype == np.int8: obs_space.dtype = np.dtype(np.float32) if legal_actions_space.dtype == np.int8: legal_actions_space.dtype = np.dtype(np.int64) observation_specs[agent] = types.OLT( observation=_convert_to_spec(obs_space), legal_actions=_convert_to_spec(legal_actions_space), terminal=specs.Array((1, ), np.float32), ) return observation_specs
def _convert_observations(self, observes: Dict[str, np.ndarray], dones: Dict[str, bool]) -> types.Observation: observations: Dict[str, types.OLT] = {} for agent, observation in observes.items(): if isinstance(observation, dict) and "action_mask" in observation: legals = observation["action_mask"] observation = observation["observation"] else: legals = np.ones( _convert_to_spec(self.action_space).shape, dtype=self.action_space.dtype, ) observations[agent] = types.OLT( observation=observation, legal_actions=legals, terminal=np.asarray([dones[agent]], dtype=np.float32), ) return observations
def action_spec(self) -> Dict[str, specs.DiscreteArray]: return { agent: _convert_to_spec(self.action_space) for agent in self._possible_agents }
def action_spec(self) -> Dict[str, specs.DiscreteArray]: action_specs = {} for agent in self.possible_agents: action_specs[agent] = _convert_to_spec( self._environment.action_spaces[agent]) return action_specs