def _check_obs( obs: Union[tuple, dict, np.ndarray, int], observation_space: Space, method_name: str, ): """Check that the observation returned by the environment correspond to the declared one. Args: obs: The observation to check observation_space: The observation space of the observation method_name: The method name that generated the observation """ if not isinstance(observation_space, Tuple): assert not isinstance( obs, tuple ), f"The observation returned by the `{method_name}()` method should be a single value, not a tuple" if isinstance(observation_space, Discrete): assert isinstance( obs, int ), f"The observation returned by `{method_name}()` method must be an int" elif _is_numpy_array_space(observation_space): assert isinstance( obs, np.ndarray ), f"The observation returned by `{method_name}()` method must be a numpy array" assert observation_space.contains( obs ), f"The observation returned by the `{method_name}()` method does not match the given observation space"
def _check_returned_values(env: gym.Env, observation_space: spaces.Space, action_space: spaces.Space) -> None: """ Check the returned values by the env when calling `.reset()` or `.step()` methods. """ # because env inherits from gym.Env, we assume that `reset()` and `step()` methods exists obs = env.reset() if isinstance(observation_space, spaces.Dict): assert isinstance( obs, dict), "The observation returned by `reset()` must be a dictionary" for key in observation_space.spaces.keys(): try: _check_obs(obs[key], observation_space.spaces[key], "reset") except AssertionError as e: raise AssertionError(f"Error while checking key={key}: " + str(e)) else: _check_obs(obs, observation_space, "reset") # Sample a random action action = action_space.sample() data = env.step(action) assert ( len(data) == 4 ), "The `step()` method must return four values: obs, reward, done, info" # Unpack obs, reward, done, info = data if isinstance(observation_space, spaces.Dict): assert isinstance( obs, dict), "The observation returned by `step()` must be a dictionary" for key in observation_space.spaces.keys(): try: _check_obs(obs[key], observation_space.spaces[key], "step") except AssertionError as e: raise AssertionError(f"Error while checking key={key}: " + str(e)) else: _check_obs(obs, observation_space, "step") # We also allow int because the reward will be cast to float assert isinstance( reward, (float, int, np.float32)), "The reward returned by `step()` must be a float" assert isinstance(done, bool), "The `done` signal must be a boolean" assert isinstance( info, dict), "The `info` returned by `step()` must be a python dictionary" if isinstance(env, gym.GoalEnv): # For a GoalEnv, the keys are checked at reset assert reward == env.compute_reward(obs["achieved_goal"], obs["desired_goal"], info)
def _check_obs(obs: Union[tuple, dict, np.ndarray, int], observation_space: spaces.Space, method_name: str) -> None: """ Check that the observation returned by the environment correspond to the declared one. """ if not isinstance(observation_space, spaces.Tuple): assert not isinstance( obs, tuple ), "The observation returned by the `{}()` method should be a single value, not a tuple".format( method_name) # The check for a GoalEnv is done by the base class if isinstance(observation_space, spaces.Discrete): assert isinstance( obs, int ), "The observation returned by `{}()` method must be an int".format( method_name) elif _is_numpy_array_space(observation_space): assert isinstance( obs, np.ndarray ), "The observation returned by `{}()` method must be a numpy array".format( method_name) assert observation_space.contains( obs ), "The observation returned by the `{}()` method does not match the given observation space".format( method_name)
def _check_obs( obs: Union[tuple, dict, np.ndarray, int], observation_space: spaces.Space, method_name: str, ) -> None: """ Check that the observation returned by the environment correspond to the declared one. """ if not isinstance(observation_space, spaces.Tuple): assert not isinstance( obs, tuple ), f"The observation returned by the `{method_name}()` method should be a single value, not a tuple" if isinstance(observation_space, spaces.Discrete): assert np.isscalar( obs ), f"The observation returned by `{method_name}()` method must be a scalar" elif _is_numpy_array_space(observation_space): assert isinstance( obs, np.ndarray ), f"The observation returned by `{method_name}()` method must be a numpy array" assert observation_space.contains( obs ), f"The observation returned by the `{method_name}()` method does not match the given observation space"
def _check_returned_values(env: gym.Env, observation_space: Space, action_space: Space): """Check the returned values by the env when calling :meth:`env.reset` or :meth:`env.step` methods. Args: env: The environment observation_space: The environment's observation space action_space: The environment's action space """ # because env inherits from gym.Env, we assume that `reset()` and `step()` methods exists obs = env.reset() if isinstance(observation_space, Dict): assert isinstance( obs, dict ), "The observation returned by `reset()` must be a dictionary" for key in observation_space.spaces.keys(): try: _check_obs(obs[key], observation_space.spaces[key], "reset") except AssertionError as e: raise AssertionError(f"Error while checking key={key}: " + str(e)) else: _check_obs(obs, observation_space, "reset") # Sample a random action action = action_space.sample() data = env.step(action) assert ( len(data) == 4 ), "The `step()` method must return four values: obs, reward, done, info" # Unpack obs, reward, done, info = data if isinstance(observation_space, Dict): assert isinstance( obs, dict ), "The observation returned by `step()` must be a dictionary" for key in observation_space.spaces.keys(): try: _check_obs(obs[key], observation_space.spaces[key], "step") except AssertionError as e: raise AssertionError(f"Error while checking key={key}: " + str(e)) else: _check_obs(obs, observation_space, "step") # We also allow int because the reward will be cast to float assert isinstance( reward, (float, int, np.float32) ), "The reward returned by `step()` must be a float" assert isinstance(done, bool), "The `done` signal must be a boolean" assert isinstance( info, dict ), "The `info` returned by `step()` must be a python dictionary"
def get_obs_batch(batch_size: int, obs_space: Space, num_tasks: int) -> Tuple[torch.Tensor, torch.Tensor]: """ Sample a batch of (multi-task) observations and task indices. Note that `obs_space` must be one-dimensional. """ obs_shape = obs_space.sample().shape assert len(obs_shape) == 1 obs_len = obs_shape[0] obs_list = [] for i in range(batch_size): ob = torch.Tensor(obs_space.sample()) task_vector = one_hot_tensor(num_tasks) obs_list.append(torch.cat([ob, task_vector])) obs = torch.stack(obs_list) nonzero_pos = obs[:, obs_len:].nonzero() assert nonzero_pos[:, 0].tolist() == list(range(batch_size)) task_indices = nonzero_pos[:, 1] return obs, task_indices
def preprocess_obs(obs: th.Tensor, device, observation_space: spaces.Space, normalize_images: bool = True) -> th.Tensor: """ Preprocess observation to be to a neural network. For images, it normalizes the values by dividing them by 255 (to have values in [0, 1]) For discrete observations, it create a one hot vector. :param obs: Observation :param observation_space: :param normalize_images: Whether to normalize images or not (True by default) :return: """ if isinstance(observation_space, JsonGraph): converted_obs = observation_space.converter(obs) converted_tensor = converted_obs.to(device) return converted_tensor else: obs = th.as_tensor(obs).to(device) if isinstance(observation_space, spaces.Box): if is_image_space(observation_space) and normalize_images: return obs.float() / 255.0 return obs.float() elif isinstance(observation_space, spaces.Discrete): # One hot encoding and convert to float to avoid errors return F.one_hot(obs.long(), num_classes=observation_space.n).float() elif isinstance(observation_space, spaces.MultiDiscrete): # Tensor concatenation of one hot encodings of each Categorical sub-space return th.cat( [ F.one_hot(obs_.long(), num_classes=int( observation_space.nvec[idx])).float() for idx, obs_ in enumerate(th.split(obs.long(), 1, dim=1)) ], dim=-1, ).view(obs.shape[0], sum(observation_space.nvec)) elif isinstance(observation_space, spaces.MultiBinary): return obs.float() else: raise NotImplementedError( f"Preprocessing not implemented for {observation_space}")
def _check_obs(obs, observation_space: spaces.Space, method_name: str): """Check that the observation returned by the environment correspond to the declared one. Args: obs: The observation to check observation_space: The observation space of the observation method_name: The method name that generated the observation """ pre = f"The observation returned by the `{method_name}()` method" assert observation_space.contains( obs ), f"{pre} is not contained with the observation space ({observation_space})" if isinstance(observation_space, spaces.Discrete): assert isinstance( obs, int ), f"The observation returned by `{method_name}()` method must be an int, actually {type(obs)}" elif isinstance( observation_space, (spaces.Box, spaces.MultiBinary, spaces.MultiDiscrete) ): assert isinstance( obs, np.ndarray ), f"The observation returned by `{method_name}()` method must be a numpy array, actually {type(obs)}" elif isinstance(observation_space, spaces.Tuple): assert isinstance( obs, tuple ), f"The observation returned by the `{method_name}()` method must be a tuple, actually {type(obs)}" for sub_obs, sub_space in zip(obs, observation_space.spaces): _check_obs(sub_obs, sub_space, method_name) elif isinstance(observation_space, spaces.Dict): assert isinstance( obs, dict ), f"The observation returned by the `{method_name}()` method must be a dict, actually {type(obs)}" for space_key in observation_space.keys(): _check_obs(obs[space_key], observation_space[space_key], method_name)
def check_run(env: gym.Env, action_space: spaces.Space): """Check normally running process of webotenv.""" num_env = 3 time_steps = 100 for _ in range(num_env): env.reset() for j in range(time_steps): action = action_space.sample() _, _, done, _ = env.step(action) if done is True: assert j+1 == env.steps_in_run, \ "The value of time steps is correct when 'done'" break if j == time_steps - 1: assert env.steps_in_run == time_steps, \ "The number time steps are correct after steps > 1"
def __init__(self, obs_space, action_space, num_outputs, model_config, name): DQNTorchModel.__init__( self, Space(shape=(obs_space.shape[0] * obs_space.shape[1] * 4, ), dtype=np.float), action_space, num_outputs, model_config, name) nn.Module.__init__(self) self.convfilter = nn.Sequential( nn.Conv2d(21, 8, kernel_size=3, padding=2), nn.BatchNorm2d(8), nn.ReLU(), nn.Conv2d(8, 4, kernel_size=3, padding=2), nn.BatchNorm2d(4), nn.ReLU(), nn.Flatten(), nn.Linear(obs_space.shape[0] * obs_space.shape[1] * 4, 32), nn.ReLU(), )
def check_reset_step(env: gym.Env, observation_space: spaces.Space, action_space: spaces.Space): """ Check reset() and step() function.""" obs_pre = env.reset() _check_obs(obs_pre, observation_space, 'reset') obs_current = env.reset() assert (obs_pre[1:8] != obs_current[1:8]).any(), \ "The infos of the observation must differ after reset the webot env." assert (obs_pre[10:] != obs_current[10:]).any(), \ "The infos of the lidar data must differ after reset the webot env." for _ in range(3): action = action_space.sample() obs_next, _, _, _ = env.step(action) _check_obs(obs_next, observation_space, 'step') assert (obs_next[0:3] != obs_current[0:3]).any(), \ "The information of observation must be updated after the first action" assert (obs_next[6:9] != obs_current[6:9]).any(), \ "The information of observation must be updated after the first action" assert (obs_next[10:] != obs_current[10:]).any(), \ "The information of lidar data must be updated after the first action"
def get_action_dim(action_space: spaces.Space) -> int: """ Get the dimension of the action space. :param action_space: :return: """ if isinstance(action_space, spaces.Box): return int(np.prod(action_space.shape)) elif isinstance(action_space, spaces.Discrete): # Action is an int return 1 elif isinstance(action_space, spaces.MultiDiscrete): # Number of discrete actions return int(len(action_space.nvec)) elif isinstance(action_space, spaces.MultiBinary): # Number of binary actions return int(action_space.n) elif isinstance(action_space, HybridBase): return action_space.get_dimension() else: raise NotImplementedError( f"{action_space} action space is not supported")
def _check_returned_values(env: gym.Env, observation_space: spaces.Space, action_space: spaces.Space) -> None: """ Check the returned values by the env when calling `.reset()` or `.step()` methods. """ # because env inherits from gym.Env, we assume that `reset()` and `step()` methods exists obs = env.reset() _check_obs(obs, observation_space, "reset") # Sample a random action action = action_space.sample() data = env.step(action) assert len( data ) == 4, "The `step()` method must return four values: obs, reward, done, info" # Unpack obs, reward, done, info = data _check_obs(obs, observation_space, "step") # We also allow int because the reward will be cast to float assert isinstance( reward, (float, int)), "The reward returned by `step()` must be a float" assert isinstance(done, bool), "The `done` signal must be a boolean" assert isinstance( info, dict), "The `info` returned by `step()` must be a python dictionary" if isinstance(env, gym.GoalEnv): # For a GoalEnv, the keys are checked at reset assert reward == env.compute_reward(obs["achieved_goal"], obs["desired_goal"], info)
def reward_space(self) -> Space: return Space(shape=(1, ), dtype=np.float32)