def _wrap_env(self, env: GymEnv) -> VecEnv: if not isinstance(env, VecEnv): if self.verbose >= 1: print("Wrapping the env in a DummyVecEnv.") env = DummyVecEnv([lambda: env]) if is_image_space(env.observation_space) and not isinstance(env, VecTransposeImage): if self.verbose >= 1: print("Wrapping the env in a VecTransposeImage.") env = VecTransposeImage(env) return env
def transpose_space(observation_space: spaces.Box) -> spaces.Box: """ Transpose an observation space (re-order channels). :param observation_space: (spaces.Box) :return: (spaces.Box) """ assert is_image_space(observation_space), 'The observation space must be an image' width, height, channels = observation_space.shape new_shape = (channels, width, height) return spaces.Box(low=0, high=255, shape=new_shape, dtype=observation_space.dtype)
def preprocesses_obs_for_model( self, observation: Union[np.ndarray, Dict[str, np.ndarray]] ) -> Tuple[Union[np.ndarray, Dict[str, np.ndarray]], bool]: """ Preporcesses obs both for prediction and evaluate action :param observation :return: Observation as PyTorch tensor :return: """ vectorized_env = False if isinstance(observation, dict): # need to copy the dict as the dict in VecFrameStack will become a torch tensor observation = copy.deepcopy(observation) for key, obs in observation.items(): obs_space = self.observation_space.spaces[key] if is_image_space(obs_space): obs_ = maybe_transpose(obs, obs_space) else: obs_ = np.array(obs) vectorized_env = vectorized_env or is_vectorized_observation(obs_, obs_space) # Add batch dimension if needed observation[key] = obs_.reshape((-1,) + self.observation_space[key].shape) elif is_image_space(self.observation_space): # Handle the different cases for images # as PyTorch use channel first format observation = maybe_transpose(observation, self.observation_space) else: observation = np.array(observation) if not isinstance(observation, dict): # Dict obs need to be handled separately vectorized_env = is_vectorized_observation(observation, self.observation_space) # Add batch dimension if needed observation = observation.reshape((-1,) + self.observation_space.shape) return observation, vectorized_env
def create_env(n_envs, eval_env=False, no_log=False): """ Create the environment and wrap it if necessary :param n_envs: (int) :param eval_env: (bool) Whether is it an environment used for evaluation or not :param no_log: (bool) Do not log training when doing hyperparameter optim (issue with writing the same file) :return: (Union[gym.Env, VecEnv]) """ global hyperparams global env_kwargs # Do not log eval env (issue with writing the same file) log_dir = None if eval_env or no_log else save_path if n_envs == 1: env = DummyVecEnv([make_env(env_id, 0, args.seed, wrapper_class=env_wrapper, log_dir=log_dir, env_kwargs=env_kwargs)]) else: # env = SubprocVecEnv([make_env(env_id, i, args.seed) for i in range(n_envs)]) # On most env, SubprocVecEnv does not help and is quite memory hungry env = DummyVecEnv([make_env(env_id, i, args.seed, log_dir=log_dir, env_kwargs=env_kwargs, wrapper_class=env_wrapper) for i in range(n_envs)]) # if normalize: # # Copy to avoid changing default values by reference # local_normalize_kwargs = normalize_kwargs.copy() # # Do not normalize reward for env used for evaluation # if eval_env: # if len(local_normalize_kwargs) > 0: # local_normalize_kwargs['norm_reward'] = False # else: # local_normalize_kwargs = {'norm_reward': False} # if args.verbose > 0: # if len(local_normalize_kwargs) > 0: # print(f"Normalization activated: {local_normalize_kwargs}") # else: # print("Normalizing input and reward") # env = VecNormalize(env, **local_normalize_kwargs) # Optional Frame-stacking if hyperparams.get('frame_stack', False): n_stack = hyperparams['frame_stack'] env = VecFrameStack(env, n_stack) print(f"Stacking {n_stack} frames") if is_image_space(env.observation_space): if args.verbose > 0: print("Wrapping into a VecTransposeImage") env = VecTransposeImage(env) return env
def create_env(n_envs, eval_env=False): """ Create the environment and wrap it if necessary :param n_envs: (int) :param eval_env: (bool) Whether is it an environment used for evaluation or not :return: (Union[gym.Env, VecEnv]) """ global hyperparams global env_kwargs # Do not log eval env (issue with writing the same file) log_dir = None if eval_env else save_path if n_envs == 1: env = DummyVecEnv([ make_env(env_id, 0, args.seed, wrapper_class=env_wrapper, log_dir=log_dir, env_kwargs=env_kwargs) ]) else: # env = SubprocVecEnv([make_env(env_id, i, args.seed) for i in range(n_envs)]) # On most env, SubprocVecEnv does not help and is quite memory hungry env = DummyVecEnv([ make_env(env_id, i, args.seed, log_dir=log_dir, env_kwargs=env_kwargs, wrapper_class=env_wrapper) for i in range(n_envs) ]) if normalize: if args.verbose > 0: if len(normalize_kwargs) > 0: print(f"Normalization activated: {normalize_kwargs}") else: print("Normalizing input and reward") env = VecNormalize(env, **normalize_kwargs) # Optional Frame-stacking if hyperparams.get('frame_stack', False): n_stack = hyperparams['frame_stack'] env = VecFrameStack(env, n_stack) print(f"Stacking {n_stack} frames") if is_image_space(env.observation_space): if args.verbose > 0: print("Wrapping into a VecTransposeImage") env = VecTransposeImage(env) return env
def __init__(self, venv: VecEnv, skip: bool = False): assert is_image_space(venv.observation_space) or isinstance( venv.observation_space, spaces.dict.Dict ), "The observation space must be an image or dictionary observation space" self.skip = skip # Do nothing if skip: super(VecTransposeImage, self).__init__(venv) return if isinstance(venv.observation_space, spaces.dict.Dict): self.image_space_keys = [] observation_space = deepcopy(venv.observation_space) for key, space in observation_space.spaces.items(): if is_image_space(space): # Keep track of which keys should be transposed later self.image_space_keys.append(key) observation_space.spaces[key] = self.transpose_space(space, key) else: observation_space = self.transpose_space(venv.observation_space) super(VecTransposeImage, self).__init__(venv, observation_space=observation_space)
def obs_to_tensor(self, observation: Union[np.ndarray, Dict[str, np.ndarray]]) -> Tuple[th.Tensor, bool]: """ Convert an input observation to a PyTorch tensor that can be fed to a model. Includes sugar-coating to handle different observations (e.g. normalizing images). :param observation: the input observation :return: The observation as PyTorch tensor and whether the observation is vectorized or not """ vectorized_env = False if isinstance(observation, dict): # need to copy the dict as the dict in VecFrameStack will become a torch tensor observation = copy.deepcopy(observation) for key, obs in observation.items(): obs_space = self.observation_space.spaces[key] if is_image_space(obs_space): obs_ = maybe_transpose(obs, obs_space) else: obs_ = np.array(obs) vectorized_env = vectorized_env or is_vectorized_observation(obs_, obs_space) # Add batch dimension if needed observation[key] = obs_.reshape((-1,) + self.observation_space[key].shape) elif is_image_space(self.observation_space): # Handle the different cases for images # as PyTorch use channel first format observation = maybe_transpose(observation, self.observation_space) else: observation = np.array(observation) if not isinstance(observation, dict): # Dict obs need to be handled separately vectorized_env = is_vectorized_observation(observation, self.observation_space) # Add batch dimension if needed observation = observation.reshape((-1,) + self.observation_space.shape) observation = obs_as_tensor(observation, self.device) return observation, vectorized_env
def create_envs(self, n_envs: int, eval_env: bool = False, no_log: bool = False) -> VecEnv: """ Create the environment and wrap it if necessary. :param n_envs: :param eval_env: Whether is it an environment used for evaluation or not :param no_log: Do not log training when doing hyperparameter optim (issue with writing the same file) :return: the vectorized environment, with appropriate wrappers """ # Do not log eval env (issue with writing the same file) log_dir = None if eval_env or no_log else self.save_path monitor_kwargs = {} # Special case for GoalEnvs: log success rate too if "Neck" in self.env_id or self.is_robotics_env(self.env_id) or "parking-v0" in self.env_id: monitor_kwargs = dict(info_keywords=("is_success",)) # On most env, SubprocVecEnv does not help and is quite memory hungry # therefore we use DummyVecEnv by default env = make_vec_env( env_id=self.env_id, n_envs=n_envs, seed=self.seed, env_kwargs=self.env_kwargs, monitor_dir=log_dir, wrapper_class=self.env_wrapper, vec_env_cls=self.vec_env_class, vec_env_kwargs=self.vec_env_kwargs, monitor_kwargs=monitor_kwargs, ) # Wrap the env into a VecNormalize wrapper if needed # and load saved statistics when present env = self._maybe_normalize(env, eval_env) # Optional Frame-stacking if self.frame_stack is not None: n_stack = self.frame_stack env = VecFrameStack(env, n_stack) if self.verbose > 0: print(f"Stacking {n_stack} frames") # Wrap if needed to re-order channels # (switch from channel last to channel first convention) if is_image_space(env.observation_space) and not is_image_space_channels_first(env.observation_space): if self.verbose > 0: print("Wrapping into a VecTransposeImage") env = VecTransposeImage(env) return env
def _wrap_env(env: GymEnv, verbose: int = 0) -> VecEnv: if not isinstance(env, VecEnv): if verbose >= 1: print("Wrapping the env in a DummyVecEnv.") env = DummyVecEnv([lambda: env]) if is_image_space(env.observation_space) and not is_wrapped(env, VecTransposeImage): if verbose >= 1: print("Wrapping the env in a VecTransposeImage.") env = VecTransposeImage(env) # check if wrapper for dict support is needed when using HER if isinstance(env.observation_space, gym.spaces.dict.Dict): env = ObsDictWrapper(env) return env
def transpose_space(observation_space: spaces.Box, key: str = "") -> spaces.Box: """ Transpose an observation space (re-order channels). :param observation_space: :param key: In case of dictionary space, the key of the observation space. :return: """ # Sanity checks assert is_image_space(observation_space), "The observation space must be an image" assert not is_image_space_channels_first( observation_space ), f"The observation space {key} must follow the channel last convention" height, width, channels = observation_space.shape new_shape = (channels, height, width) return spaces.Box(low=0, high=255, shape=new_shape, dtype=observation_space.dtype)
def test_image_space_checks(): not_image_space = spaces.Box(0, 1, shape=(10, )) assert not is_image_space(not_image_space) # Not uint8 not_image_space = spaces.Box(0, 255, shape=(10, 10, 3)) assert not is_image_space(not_image_space) # Not correct shape not_image_space = spaces.Box(0, 255, shape=(10, 10), dtype=np.uint8) assert not is_image_space(not_image_space) # Not correct low/high not_image_space = spaces.Box(0, 10, shape=(10, 10, 3), dtype=np.uint8) assert not is_image_space(not_image_space) # Not correct space not_image_space = spaces.Discrete(n=10) assert not is_image_space(not_image_space) an_image_space = spaces.Box(0, 255, shape=(10, 10, 3), dtype=np.uint8) assert is_image_space(an_image_space) an_image_space_with_odd_channels = spaces.Box(0, 255, shape=(10, 10, 5), dtype=np.uint8) assert is_image_space(an_image_space_with_odd_channels) # Should not pass if we check if channels are valid for an image assert not is_image_space(an_image_space_with_odd_channels, check_channels=True) # Test if channel-check works channel_first_space = spaces.Box(0, 255, shape=(3, 10, 10), dtype=np.uint8) assert is_image_space_channels_first(channel_first_space) channel_last_space = spaces.Box(0, 255, shape=(10, 10, 3), dtype=np.uint8) assert not is_image_space_channels_first(channel_last_space) channel_mid_space = spaces.Box(0, 255, shape=(10, 3, 10), dtype=np.uint8) # Should raise a warning with pytest.warns(Warning): assert not is_image_space_channels_first(channel_mid_space)
def check_env(env: GymEnv, observation_space: gym.spaces.Space, action_space: gym.spaces.Space): """ Checks the validity of the environment to load vs the one used for training. Checked parameters: - observation_space - action_space :param env: (GymEnv) :param observation_space: (gym.spaces.Space) :param action_space: (gym.spaces.Space) """ if (observation_space != env.observation_space # Special cases for images that need to be transposed and not (is_image_space(env.observation_space) and observation_space == VecTransposeImage.transpose_space(env.observation_space))): raise ValueError(f'Observation spaces do not match: {observation_space} != {env.observation_space}') if action_space != env.action_space: raise ValueError(f'Action spaces do not match: {action_space} != {env.action_space}')
def _wrap_env(self, env: GymEnv) -> VecEnv: """ This overrides _wrap_env from stable_baselines3.common.base_class.BaseAlgorithm Now the DummyVecEnv gets wrapped inside a VecNormalize environment to normalize rewards and observations """ if not isinstance(env, VecEnv): if self.verbose >= 1: print("Wrapping the env in a DummyVecEnv.") env = DummyVecEnv([lambda: env]) # Automatically normalize the input features and reward # norm_obs/norm_reward: True if obs/rew should be normalized # clip_obs: Max absolute value for observation # clip_reward: Max value absolute for discounted reward # gamma: discount factor env = VecNormalize(env, training=True, norm_obs=True, norm_reward=True, gamma=0.99) # clip_obs=10., clip_reward=10.0, if is_image_space(env.observation_space) and not isinstance(env, VecTransposeImage): if self.verbose >= 1: print("Wrapping the env in a VecTransposeImage.") env = VecTransposeImage(env) return env
def __init__(self, venv: VecEnv, n_stack: int, channels_order: Optional[str] = None): self.venv = venv self.n_stack = n_stack wrapped_obs_space = venv.observation_space assert isinstance( wrapped_obs_space, spaces.Box ), "VecFrameStack only work with gym.spaces.Box observation space" if channels_order is None: # Detect channel location automatically for images if is_image_space(wrapped_obs_space): self.channels_first = is_image_space_channels_first( wrapped_obs_space) else: # Default behavior for non-image space, stack on the last axis self.channels_first = False else: assert channels_order in { "last", "first" }, "`channels_order` must be one of following: 'last', 'first'" self.channels_first = channels_order == "first" # This includes the vec-env dimension (first) self.stack_dimension = 1 if self.channels_first else -1 repeat_axis = 0 if self.channels_first else -1 low = np.repeat(wrapped_obs_space.low, self.n_stack, axis=repeat_axis) high = np.repeat(wrapped_obs_space.high, self.n_stack, axis=repeat_axis) self.stackedobs = np.zeros((venv.num_envs, ) + low.shape, low.dtype) observation_space = spaces.Box(low=low, high=high, dtype=venv.observation_space.dtype) VecEnvWrapper.__init__(self, venv, observation_space=observation_space)
def compute_stacking( num_envs: int, n_stack: int, observation_space: spaces.Box, channels_order: Optional[str] = None, ) -> Tuple[bool, int, np.ndarray, int]: """ Calculates the parameters in order to stack observations :param num_envs: Number of environments in the stack :param n_stack: The number of observations to stack :param observation_space: The observation space :param channels_order: The order of the channels :return: tuple of channels_first, stack_dimension, stackedobs, repeat_axis """ channels_first = False if channels_order is None: # Detect channel location automatically for images if is_image_space(observation_space): channels_first = is_image_space_channels_first( observation_space) else: # Default behavior for non-image space, stack on the last axis channels_first = False else: assert channels_order in { "last", "first", }, "`channels_order` must be one of following: 'last', 'first'" channels_first = channels_order == "first" # This includes the vec-env dimension (first) stack_dimension = 1 if channels_first else -1 repeat_axis = 0 if channels_first else -1 low = np.repeat(observation_space.low, n_stack, axis=repeat_axis) stackedobs = np.zeros((num_envs, ) + low.shape, low.dtype) return channels_first, stack_dimension, stackedobs, repeat_axis
def check_for_correct_spaces(env: GymEnv, observation_space: gym.spaces.Space, action_space: gym.spaces.Space) -> None: """ Checks that the environment has same spaces as provided ones. Used by BaseAlgorithm to check if spaces match after loading the model with given env. Checked parameters: - observation_space - action_space :param env: Environment to check for valid spaces :param observation_space: Observation space to check against :param action_space: Action space to check against """ if ( observation_space != env.observation_space # Special cases for images that need to be transposed and not ( is_image_space(env.observation_space) and observation_space == VecTransposeImage.transpose_space(env.observation_space) ) ): raise ValueError(f"Observation spaces do not match: {observation_space} != {env.observation_space}") if action_space != env.action_space: raise ValueError(f"Action spaces do not match: {action_space} != {env.action_space}")
def __init__(self, observation_space: gym.spaces.Dict, cnn_output_dim: int = 256): # TODO we do not know features-dim here before going over all the items, so put something there. This is dirty! super(CombinedExtractor, self).__init__(observation_space, features_dim=1) extractors = {} total_concat_size = 0 for key, subspace in observation_space.spaces.items(): if is_image_space(subspace): extractors[key] = NatureCNN(subspace, features_dim=cnn_output_dim) total_concat_size += cnn_output_dim else: # The observation key is a vector, flatten it if needed extractors[key] = nn.Flatten() total_concat_size += get_flattened_obs_dim(subspace) self.extractors = nn.ModuleDict(extractors) # Update the features dim manually self._features_dim = total_concat_size
def predict( self, observation: np.ndarray, partner_idx: int = 0, state: Optional[np.ndarray] = None, mask: Optional[np.ndarray] = None, deterministic: bool = False, ) -> Tuple[np.ndarray, Optional[np.ndarray]]: """ Get the policy action and state from an observation (and optional state). Includes sugar-coating to handle different observations (e.g. normalizing images). :param observation: (np.ndarray) the input observation :param state: (Optional[np.ndarray]) The last states (can be None, used in recurrent policies) :param mask: (Optional[np.ndarray]) The last masks (can be None, used in recurrent policies) :param deterministic: (bool) Whether or not to return deterministic actions. :return: (Tuple[np.ndarray, Optional[np.ndarray]]) the model's action and the next state (used in recurrent policies) """ # TODO (GH/1): add support for RNN policies # if state is None: # state = self.initial_state # if mask is None: # mask = [False for _ in range(self.n_envs)] observation = np.array(observation) # Handle the different cases for images # as PyTorch use channel first format if is_image_space(self.observation_space) and not ( observation.shape == self.observation_space.shape or observation.shape[1:] == self.observation_space.shape): # Try to re-order the channels transpose_obs = VecTransposeImage.transpose_image(observation) if (transpose_obs.shape == self.observation_space.shape or transpose_obs.shape[1:] == self.observation_space.shape): observation = transpose_obs vectorized_env = is_vectorized_observation(observation, self.observation_space) observation = observation.reshape((-1, ) + self.observation_space.shape) observation = th.as_tensor(observation).to(self.device) with th.no_grad(): actions = self._predict(observation, partner_idx=partner_idx, deterministic=deterministic) # Convert to numpy actions = actions.cpu().numpy() if isinstance(self.action_space, gym.spaces.Box): if self.squash_output: # Rescale to proper domain when using squashing actions = self.unscale_action(actions) else: # Actions could be on arbitrary scale, so clip the actions to avoid # out of bound error (e.g. if sampling from a Gaussian distribution) actions = np.clip(actions, self.action_space.low, self.action_space.high) if not vectorized_env: if state is not None: raise ValueError( "Error: The environment must be vectorized when using recurrent policies." ) actions = actions[0] return actions, state
def __init__(self, observation_space: gym.Space, obs_unwrapper_function: Callable, obs_space_dict: Dict[str, gym.Space], normalize_images: bool, features_dim: int = 20, cnn_extractor_class: BaseFeaturesExtractor = NatureCNN, cnn_feature_dim: int = 12, mlp_net_arch: Iterable = (4, ), mlp_feature_dim: int = 6, embedding_dim: int = 6): super().__init__(observation_space, obs_unwrapper_function, obs_space_dict, normalize_images, features_dim) # This gets the string obs spaces associated with each extractor # They're stored in a nested _ separated string self.split_chars = "__" self.inferred_extractor_mapping = self.recursive_space_infer( obs_space_dict) self.cnn_spaces = self.inferred_extractor_mapping['CNN'] self.mlp_spaces = self.inferred_extractor_mapping['MLP'] self.embed_spaces = self.inferred_extractor_mapping['EMBED'] _cnn_extractors = [] total_flattened_dim = 0 # Create CNN extractors for space_designation in self.cnn_spaces: cnn_space = recursive_lookup_from_string(obs_space_dict, space_designation, self.split_chars) assert is_image_space(cnn_space) _cnn_extractors.append( cnn_extractor_class(cnn_space, cnn_feature_dim)) total_flattened_dim += cnn_feature_dim self.cnn_extractors = nn.ModuleList(_cnn_extractors) # Create MLP Extractor total_mlp_dim = 0 if len(self.mlp_spaces) > 0: for space_designation in self.mlp_spaces: mlp_space = recursive_lookup_from_string( obs_space_dict, space_designation, self.split_chars) assert isinstance(mlp_space, gym.spaces.Box) # assume if the space is multi-dimensional, we'll flatten it # before sending it to a MLP n_dim = int(np.prod(mlp_space.shape)) total_mlp_dim += n_dim self.mlp_extractor = nn.Sequential( *create_mlp(total_mlp_dim, mlp_feature_dim, mlp_net_arch)) total_flattened_dim += mlp_feature_dim else: self.mlp_extractor = None # Create Embed tables if len(self.embed_spaces) > 0: _embedding_tables = [] for space_designation in self.embed_spaces: embed_space = recursive_lookup_from_string( obs_space_dict, space_designation, self.split_chars) assert isinstance(embed_space, gym.spaces.Discrete) space_n = embed_space.n _embedding_tables.append( nn.Embedding(embedding_dim=embedding_dim, num_embeddings=space_n)) total_flattened_dim += embedding_dim self.embedding_tables = nn.ModuleList(_embedding_tables) else: self.embedding_tables = None self.projection_layer = nn.Linear(total_flattened_dim, features_dim)
def create_envs(self, n_envs: int, eval_env: bool = False, no_log: bool = False) -> VecEnv: """ Create the environment and wrap it if necessary. :param n_envs: :param eval_env: Whether is it an environment used for evaluation or not :param no_log: Do not log training when doing hyperparameter optim (issue with writing the same file) :return: the vectorized environment, with appropriate wrappers """ # Do not log eval env (issue with writing the same file) log_dir = None if eval_env or no_log else self.save_path monitor_kwargs = {} # Special case for GoalEnvs: log success rate too if "Neck" in self.env_id or self.is_robotics_env( self.env_id) or "parking-v0" in self.env_id: monitor_kwargs = dict(info_keywords=("is_success", )) # Note: made custom to support Gazebo Runtime wrapping def make_env(): def _init(): env = self.env_wrapper(env=self.env_id, **self.env_kwargs) env.seed(self.seed) env.action_space.seed(self.seed) monitor_path = log_dir if log_dir is not None else None if monitor_path is not None: os.makedirs(log_dir, exist_ok=True) env = Monitor(env, filename=monitor_path, **monitor_kwargs) return env return _init if self.vec_env_class is None: self.vec_env_class = DummyVecEnv env = self.vec_env_class([make_env()], **self.vec_env_kwargs) # Wrap the env into a VecNormalize wrapper if needed # and load saved statistics when present env = self._maybe_normalize(env, eval_env) # Optional Frame-stacking if self.frame_stack is not None: n_stack = self.frame_stack env = VecFrameStack(env, n_stack) if self.verbose > 0: print(f"Stacking {n_stack} frames") # Wrap if needed to re-order channels # (switch from channel last to channel first convention) if is_image_space( env.observation_space) and not is_image_space_channels_first( env.observation_space): if self.verbose > 0: print("Wrapping into a VecTransposeImage") env = VecTransposeImage(env) # check if wrapper for dict support is needed if self.algo == "her": if self.verbose > 0: print("Wrapping into a ObsDictWrapper") env = ObsDictWrapper(env) return env
def predict( self, observation: Union[np.ndarray, Dict[str, np.ndarray]], state: Optional[np.ndarray] = None, mask: Optional[np.ndarray] = None, deterministic: bool = False, ) -> Tuple[np.ndarray, Optional[np.ndarray]]: """ Get the policy action and state from an observation (and optional state). Includes sugar-coating to handle different observations (e.g. normalizing images). :param observation: the input observation :param state: The last states (can be None, used in recurrent policies) :param mask: The last masks (can be None, used in recurrent policies) :param deterministic: Whether or not to return deterministic actions. :return: the model's action and the next state (used in recurrent policies) """ # TODO (GH/1): add support for RNN policies # if state is None: # state = self.initial_state # if mask is None: # mask = [False for _ in range(self.n_envs)] # Switch to eval mode (this affects batch norm / dropout) self.eval() vectorized_env = False if isinstance(observation, dict): # need to copy the dict as the dict in VecFrameStack will become a torch tensor observation = copy.deepcopy(observation) for key, obs in observation.items(): obs_space = self.observation_space.spaces[key] if is_image_space(obs_space): obs_ = maybe_transpose(obs, obs_space) else: obs_ = np.array(obs) vectorized_env = vectorized_env or is_vectorized_observation(obs_, obs_space) # Add batch dimension if needed observation[key] = obs_.reshape((-1,) + self.observation_space[key].shape) elif is_image_space(self.observation_space): # Handle the different cases for images # as PyTorch use channel first format observation = maybe_transpose(observation, self.observation_space) else: observation = np.array(observation) if not isinstance(observation, dict): # Dict obs need to be handled separately vectorized_env = is_vectorized_observation(observation, self.observation_space) # Add batch dimension if needed observation = observation.reshape((-1,) + self.observation_space.shape) observation = obs_as_tensor(observation, self.device) with th.no_grad(): actions = self._predict(observation, deterministic=deterministic) # Convert to numpy actions = actions.cpu().numpy() if isinstance(self.action_space, gym.spaces.Box): if self.squash_output: # Rescale to proper domain when using squashing actions = self.unscale_action(actions) else: # Actions could be on arbitrary scale, so clip the actions to avoid # out of bound error (e.g. if sampling from a Gaussian distribution) actions = np.clip(actions, self.action_space.low, self.action_space.high) if not vectorized_env: if state is not None: raise ValueError("Error: The environment must be vectorized when using recurrent policies.") actions = actions[0] return actions, state
def __init__(self, venv: VecEnv): assert is_image_space(venv.observation_space), "The observation space must be an image" observation_space = self.transpose_space(venv.observation_space) super(VecTransposeImage, self).__init__(venv, observation_space=observation_space)
def __init__(self, observation_space: gym.spaces.Box): super(Benchmark, self).__init__(observation_space, self.output_channels) assert is_image_space(observation_space), "This feature extraction policy must be used with image spaces." self._setup()
def image_transpose(env): if is_image_space(env.observation_space) and not is_image_space_channels_first( env.observation_space ): env = VecTransposeImage(env) return env