コード例 #1
0
    def _wrap_env(self, env: GymEnv) -> VecEnv:
        if not isinstance(env, VecEnv):
            if self.verbose >= 1:
                print("Wrapping the env in a DummyVecEnv.")
            env = DummyVecEnv([lambda: env])

        if is_image_space(env.observation_space) and not isinstance(env, VecTransposeImage):
            if self.verbose >= 1:
                print("Wrapping the env in a VecTransposeImage.")
            env = VecTransposeImage(env)
        return env
コード例 #2
0
    def transpose_space(observation_space: spaces.Box) -> spaces.Box:
        """
        Transpose an observation space (re-order channels).

        :param observation_space: (spaces.Box)
        :return: (spaces.Box)
        """
        assert is_image_space(observation_space), 'The observation space must be an image'
        width, height, channels = observation_space.shape
        new_shape = (channels, width, height)
        return spaces.Box(low=0, high=255, shape=new_shape, dtype=observation_space.dtype)
コード例 #3
0
    def preprocesses_obs_for_model(
            self,
            observation: Union[np.ndarray, Dict[str, np.ndarray]]
    ) -> Tuple[Union[np.ndarray, Dict[str, np.ndarray]], bool]:
        """
        Preporcesses obs both for prediction and evaluate action

        :param observation
        :return: Observation as PyTorch tensor
        :return:
        """
        vectorized_env = False
        if isinstance(observation, dict):
            # need to copy the dict as the dict in VecFrameStack will become a torch tensor
            observation = copy.deepcopy(observation)
            for key, obs in observation.items():
                obs_space = self.observation_space.spaces[key]
                if is_image_space(obs_space):
                    obs_ = maybe_transpose(obs, obs_space)
                else:
                    obs_ = np.array(obs)
                vectorized_env = vectorized_env or is_vectorized_observation(obs_, obs_space)
                # Add batch dimension if needed
                observation[key] = obs_.reshape((-1,) + self.observation_space[key].shape)

        elif is_image_space(self.observation_space):
            # Handle the different cases for images
            # as PyTorch use channel first format
            observation = maybe_transpose(observation, self.observation_space)

        else:
            observation = np.array(observation)

        if not isinstance(observation, dict):
            # Dict obs need to be handled separately
            vectorized_env = is_vectorized_observation(observation, self.observation_space)
            # Add batch dimension if needed
            observation = observation.reshape((-1,) + self.observation_space.shape)

        return observation, vectorized_env
コード例 #4
0
    def create_env(n_envs, eval_env=False, no_log=False):
        """
        Create the environment and wrap it if necessary
        :param n_envs: (int)
        :param eval_env: (bool) Whether is it an environment used for evaluation or not
        :param no_log: (bool) Do not log training when doing hyperparameter optim
            (issue with writing the same file)
        :return: (Union[gym.Env, VecEnv])
        """
        global hyperparams
        global env_kwargs

        # Do not log eval env (issue with writing the same file)
        log_dir = None if eval_env or no_log else save_path

        if n_envs == 1:
            env = DummyVecEnv([make_env(env_id, 0, args.seed,
                               wrapper_class=env_wrapper, log_dir=log_dir,
                               env_kwargs=env_kwargs)])
        else:
            # env = SubprocVecEnv([make_env(env_id, i, args.seed) for i in range(n_envs)])
            # On most env, SubprocVecEnv does not help and is quite memory hungry
            env = DummyVecEnv([make_env(env_id, i, args.seed, log_dir=log_dir, env_kwargs=env_kwargs,
                                        wrapper_class=env_wrapper) for i in range(n_envs)])
        # if normalize:
        #     # Copy to avoid changing default values by reference
        #     local_normalize_kwargs = normalize_kwargs.copy()
        #     # Do not normalize reward for env used for evaluation
        #     if eval_env:
        #         if len(local_normalize_kwargs) > 0:
        #             local_normalize_kwargs['norm_reward'] = False
        #         else:
        #             local_normalize_kwargs = {'norm_reward': False}

        #     if args.verbose > 0:
        #         if len(local_normalize_kwargs) > 0:
        #             print(f"Normalization activated: {local_normalize_kwargs}")
        #         else:
        #             print("Normalizing input and reward")
        #     env = VecNormalize(env, **local_normalize_kwargs)

        # Optional Frame-stacking
        if hyperparams.get('frame_stack', False):
            n_stack = hyperparams['frame_stack']
            env = VecFrameStack(env, n_stack)
            print(f"Stacking {n_stack} frames")

        if is_image_space(env.observation_space):
            if args.verbose > 0:
                print("Wrapping into a VecTransposeImage")
            env = VecTransposeImage(env)
        return env
コード例 #5
0
    def create_env(n_envs, eval_env=False):
        """
        Create the environment and wrap it if necessary
        :param n_envs: (int)
        :param eval_env: (bool) Whether is it an environment used for evaluation or not
        :return: (Union[gym.Env, VecEnv])
        """
        global hyperparams
        global env_kwargs

        # Do not log eval env (issue with writing the same file)
        log_dir = None if eval_env else save_path

        if n_envs == 1:
            env = DummyVecEnv([
                make_env(env_id,
                         0,
                         args.seed,
                         wrapper_class=env_wrapper,
                         log_dir=log_dir,
                         env_kwargs=env_kwargs)
            ])
        else:
            # env = SubprocVecEnv([make_env(env_id, i, args.seed) for i in range(n_envs)])
            # On most env, SubprocVecEnv does not help and is quite memory hungry
            env = DummyVecEnv([
                make_env(env_id,
                         i,
                         args.seed,
                         log_dir=log_dir,
                         env_kwargs=env_kwargs,
                         wrapper_class=env_wrapper) for i in range(n_envs)
            ])
        if normalize:
            if args.verbose > 0:
                if len(normalize_kwargs) > 0:
                    print(f"Normalization activated: {normalize_kwargs}")
                else:
                    print("Normalizing input and reward")
            env = VecNormalize(env, **normalize_kwargs)

        # Optional Frame-stacking
        if hyperparams.get('frame_stack', False):
            n_stack = hyperparams['frame_stack']
            env = VecFrameStack(env, n_stack)
            print(f"Stacking {n_stack} frames")

        if is_image_space(env.observation_space):
            if args.verbose > 0:
                print("Wrapping into a VecTransposeImage")
            env = VecTransposeImage(env)
        return env
コード例 #6
0
    def __init__(self, venv: VecEnv, skip: bool = False):
        assert is_image_space(venv.observation_space) or isinstance(
            venv.observation_space, spaces.dict.Dict
        ), "The observation space must be an image or dictionary observation space"

        self.skip = skip
        # Do nothing
        if skip:
            super(VecTransposeImage, self).__init__(venv)
            return

        if isinstance(venv.observation_space, spaces.dict.Dict):
            self.image_space_keys = []
            observation_space = deepcopy(venv.observation_space)
            for key, space in observation_space.spaces.items():
                if is_image_space(space):
                    # Keep track of which keys should be transposed later
                    self.image_space_keys.append(key)
                    observation_space.spaces[key] = self.transpose_space(space, key)
        else:
            observation_space = self.transpose_space(venv.observation_space)
        super(VecTransposeImage, self).__init__(venv, observation_space=observation_space)
コード例 #7
0
ファイル: policies.py プロジェクト: jie-jay/stable-baselines3
    def obs_to_tensor(self, observation: Union[np.ndarray, Dict[str, np.ndarray]]) -> Tuple[th.Tensor, bool]:
        """
        Convert an input observation to a PyTorch tensor that can be fed to a model.
        Includes sugar-coating to handle different observations (e.g. normalizing images).

        :param observation: the input observation
        :return: The observation as PyTorch tensor
            and whether the observation is vectorized or not
        """
        vectorized_env = False
        if isinstance(observation, dict):
            # need to copy the dict as the dict in VecFrameStack will become a torch tensor
            observation = copy.deepcopy(observation)
            for key, obs in observation.items():
                obs_space = self.observation_space.spaces[key]
                if is_image_space(obs_space):
                    obs_ = maybe_transpose(obs, obs_space)
                else:
                    obs_ = np.array(obs)
                vectorized_env = vectorized_env or is_vectorized_observation(obs_, obs_space)
                # Add batch dimension if needed
                observation[key] = obs_.reshape((-1,) + self.observation_space[key].shape)

        elif is_image_space(self.observation_space):
            # Handle the different cases for images
            # as PyTorch use channel first format
            observation = maybe_transpose(observation, self.observation_space)

        else:
            observation = np.array(observation)

        if not isinstance(observation, dict):
            # Dict obs need to be handled separately
            vectorized_env = is_vectorized_observation(observation, self.observation_space)
            # Add batch dimension if needed
            observation = observation.reshape((-1,) + self.observation_space.shape)

        observation = obs_as_tensor(observation, self.device)
        return observation, vectorized_env
コード例 #8
0
    def create_envs(self, n_envs: int, eval_env: bool = False, no_log: bool = False) -> VecEnv:
        """
        Create the environment and wrap it if necessary.

        :param n_envs:
        :param eval_env: Whether is it an environment used for evaluation or not
        :param no_log: Do not log training when doing hyperparameter optim
            (issue with writing the same file)
        :return: the vectorized environment, with appropriate wrappers
        """
        # Do not log eval env (issue with writing the same file)
        log_dir = None if eval_env or no_log else self.save_path

        monitor_kwargs = {}
        # Special case for GoalEnvs: log success rate too
        if "Neck" in self.env_id or self.is_robotics_env(self.env_id) or "parking-v0" in self.env_id:
            monitor_kwargs = dict(info_keywords=("is_success",))

        # On most env, SubprocVecEnv does not help and is quite memory hungry
        # therefore we use DummyVecEnv by default
        env = make_vec_env(
            env_id=self.env_id,
            n_envs=n_envs,
            seed=self.seed,
            env_kwargs=self.env_kwargs,
            monitor_dir=log_dir,
            wrapper_class=self.env_wrapper,
            vec_env_cls=self.vec_env_class,
            vec_env_kwargs=self.vec_env_kwargs,
            monitor_kwargs=monitor_kwargs,
        )

        # Wrap the env into a VecNormalize wrapper if needed
        # and load saved statistics when present
        env = self._maybe_normalize(env, eval_env)

        # Optional Frame-stacking
        if self.frame_stack is not None:
            n_stack = self.frame_stack
            env = VecFrameStack(env, n_stack)
            if self.verbose > 0:
                print(f"Stacking {n_stack} frames")

        # Wrap if needed to re-order channels
        # (switch from channel last to channel first convention)
        if is_image_space(env.observation_space) and not is_image_space_channels_first(env.observation_space):
            if self.verbose > 0:
                print("Wrapping into a VecTransposeImage")
            env = VecTransposeImage(env)

        return env
コード例 #9
0
ファイル: base_class.py プロジェクト: kdh0429/TorchDeepMimic
    def _wrap_env(env: GymEnv, verbose: int = 0) -> VecEnv:
        if not isinstance(env, VecEnv):
            if verbose >= 1:
                print("Wrapping the env in a DummyVecEnv.")
            env = DummyVecEnv([lambda: env])

        if is_image_space(env.observation_space) and not is_wrapped(env, VecTransposeImage):
            if verbose >= 1:
                print("Wrapping the env in a VecTransposeImage.")
            env = VecTransposeImage(env)

        # check if wrapper for dict support is needed when using HER
        if isinstance(env.observation_space, gym.spaces.dict.Dict):
            env = ObsDictWrapper(env)

        return env
コード例 #10
0
    def transpose_space(observation_space: spaces.Box, key: str = "") -> spaces.Box:
        """
        Transpose an observation space (re-order channels).

        :param observation_space:
        :param key: In case of dictionary space, the key of the observation space.
        :return:
        """
        # Sanity checks
        assert is_image_space(observation_space), "The observation space must be an image"
        assert not is_image_space_channels_first(
            observation_space
        ), f"The observation space {key} must follow the channel last convention"
        height, width, channels = observation_space.shape
        new_shape = (channels, height, width)
        return spaces.Box(low=0, high=255, shape=new_shape, dtype=observation_space.dtype)
コード例 #11
0
def test_image_space_checks():
    not_image_space = spaces.Box(0, 1, shape=(10, ))
    assert not is_image_space(not_image_space)

    # Not uint8
    not_image_space = spaces.Box(0, 255, shape=(10, 10, 3))
    assert not is_image_space(not_image_space)

    # Not correct shape
    not_image_space = spaces.Box(0, 255, shape=(10, 10), dtype=np.uint8)
    assert not is_image_space(not_image_space)

    # Not correct low/high
    not_image_space = spaces.Box(0, 10, shape=(10, 10, 3), dtype=np.uint8)
    assert not is_image_space(not_image_space)

    # Not correct space
    not_image_space = spaces.Discrete(n=10)
    assert not is_image_space(not_image_space)

    an_image_space = spaces.Box(0, 255, shape=(10, 10, 3), dtype=np.uint8)
    assert is_image_space(an_image_space)

    an_image_space_with_odd_channels = spaces.Box(0,
                                                  255,
                                                  shape=(10, 10, 5),
                                                  dtype=np.uint8)
    assert is_image_space(an_image_space_with_odd_channels)
    # Should not pass if we check if channels are valid for an image
    assert not is_image_space(an_image_space_with_odd_channels,
                              check_channels=True)

    # Test if channel-check works
    channel_first_space = spaces.Box(0, 255, shape=(3, 10, 10), dtype=np.uint8)
    assert is_image_space_channels_first(channel_first_space)

    channel_last_space = spaces.Box(0, 255, shape=(10, 10, 3), dtype=np.uint8)
    assert not is_image_space_channels_first(channel_last_space)

    channel_mid_space = spaces.Box(0, 255, shape=(10, 3, 10), dtype=np.uint8)
    # Should raise a warning
    with pytest.warns(Warning):
        assert not is_image_space_channels_first(channel_mid_space)
コード例 #12
0
    def check_env(env: GymEnv, observation_space: gym.spaces.Space, action_space: gym.spaces.Space):
        """
        Checks the validity of the environment to load vs the one used for training.
        Checked parameters:
        - observation_space
        - action_space

        :param env: (GymEnv)
        :param observation_space: (gym.spaces.Space)
        :param action_space: (gym.spaces.Space)
        """
        if (observation_space != env.observation_space
            # Special cases for images that need to be transposed
            and not (is_image_space(env.observation_space)
                     and observation_space == VecTransposeImage.transpose_space(env.observation_space))):
            raise ValueError(f'Observation spaces do not match: {observation_space} != {env.observation_space}')
        if action_space != env.action_space:
            raise ValueError(f'Action spaces do not match: {action_space} != {env.action_space}')
コード例 #13
0
 def _wrap_env(self, env: GymEnv) -> VecEnv:
     """
     This overrides _wrap_env from stable_baselines3.common.base_class.BaseAlgorithm
     Now the DummyVecEnv gets wrapped inside a VecNormalize environment to normalize rewards and observations
     """
     if not isinstance(env, VecEnv):
         if self.verbose >= 1:
             print("Wrapping the env in a DummyVecEnv.")
         env = DummyVecEnv([lambda: env])
         # Automatically normalize the input features and reward
         # norm_obs/norm_reward: True if obs/rew should be normalized
         # clip_obs: Max absolute value for observation
         # clip_reward: Max value absolute for discounted reward
         # gamma: discount factor
         env = VecNormalize(env, training=True, norm_obs=True, norm_reward=True,
                             gamma=0.99) # clip_obs=10., clip_reward=10.0,
     if is_image_space(env.observation_space) and not isinstance(env, VecTransposeImage):
         if self.verbose >= 1:
             print("Wrapping the env in a VecTransposeImage.")
         env = VecTransposeImage(env)
     return env
コード例 #14
0
    def __init__(self,
                 venv: VecEnv,
                 n_stack: int,
                 channels_order: Optional[str] = None):
        self.venv = venv
        self.n_stack = n_stack

        wrapped_obs_space = venv.observation_space
        assert isinstance(
            wrapped_obs_space, spaces.Box
        ), "VecFrameStack only work with gym.spaces.Box observation space"

        if channels_order is None:
            # Detect channel location automatically for images
            if is_image_space(wrapped_obs_space):
                self.channels_first = is_image_space_channels_first(
                    wrapped_obs_space)
            else:
                # Default behavior for non-image space, stack on the last axis
                self.channels_first = False
        else:
            assert channels_order in {
                "last", "first"
            }, "`channels_order` must be one of following: 'last', 'first'"

            self.channels_first = channels_order == "first"

        # This includes the vec-env dimension (first)
        self.stack_dimension = 1 if self.channels_first else -1
        repeat_axis = 0 if self.channels_first else -1
        low = np.repeat(wrapped_obs_space.low, self.n_stack, axis=repeat_axis)
        high = np.repeat(wrapped_obs_space.high,
                         self.n_stack,
                         axis=repeat_axis)
        self.stackedobs = np.zeros((venv.num_envs, ) + low.shape, low.dtype)
        observation_space = spaces.Box(low=low,
                                       high=high,
                                       dtype=venv.observation_space.dtype)
        VecEnvWrapper.__init__(self, venv, observation_space=observation_space)
コード例 #15
0
    def compute_stacking(
        num_envs: int,
        n_stack: int,
        observation_space: spaces.Box,
        channels_order: Optional[str] = None,
    ) -> Tuple[bool, int, np.ndarray, int]:
        """
        Calculates the parameters in order to stack observations

        :param num_envs: Number of environments in the stack
        :param n_stack: The number of observations to stack
        :param observation_space: The observation space
        :param channels_order: The order of the channels
        :return: tuple of channels_first, stack_dimension, stackedobs, repeat_axis
        """
        channels_first = False
        if channels_order is None:
            # Detect channel location automatically for images
            if is_image_space(observation_space):
                channels_first = is_image_space_channels_first(
                    observation_space)
            else:
                # Default behavior for non-image space, stack on the last axis
                channels_first = False
        else:
            assert channels_order in {
                "last",
                "first",
            }, "`channels_order` must be one of following: 'last', 'first'"

            channels_first = channels_order == "first"

        # This includes the vec-env dimension (first)
        stack_dimension = 1 if channels_first else -1
        repeat_axis = 0 if channels_first else -1
        low = np.repeat(observation_space.low, n_stack, axis=repeat_axis)
        stackedobs = np.zeros((num_envs, ) + low.shape, low.dtype)
        return channels_first, stack_dimension, stackedobs, repeat_axis
コード例 #16
0
def check_for_correct_spaces(env: GymEnv, observation_space: gym.spaces.Space, action_space: gym.spaces.Space) -> None:
    """
    Checks that the environment has same spaces as provided ones. Used by BaseAlgorithm to check if
    spaces match after loading the model with given env.
    Checked parameters:
    - observation_space
    - action_space

    :param env: Environment to check for valid spaces
    :param observation_space: Observation space to check against
    :param action_space: Action space to check against
    """
    if (
        observation_space != env.observation_space
        # Special cases for images that need to be transposed
        and not (
            is_image_space(env.observation_space)
            and observation_space == VecTransposeImage.transpose_space(env.observation_space)
        )
    ):
        raise ValueError(f"Observation spaces do not match: {observation_space} != {env.observation_space}")
    if action_space != env.action_space:
        raise ValueError(f"Action spaces do not match: {action_space} != {env.action_space}")
コード例 #17
0
    def __init__(self,
                 observation_space: gym.spaces.Dict,
                 cnn_output_dim: int = 256):
        # TODO we do not know features-dim here before going over all the items, so put something there. This is dirty!
        super(CombinedExtractor, self).__init__(observation_space,
                                                features_dim=1)

        extractors = {}

        total_concat_size = 0
        for key, subspace in observation_space.spaces.items():
            if is_image_space(subspace):
                extractors[key] = NatureCNN(subspace,
                                            features_dim=cnn_output_dim)
                total_concat_size += cnn_output_dim
            else:
                # The observation key is a vector, flatten it if needed
                extractors[key] = nn.Flatten()
                total_concat_size += get_flattened_obs_dim(subspace)

        self.extractors = nn.ModuleDict(extractors)

        # Update the features dim manually
        self._features_dim = total_concat_size
コード例 #18
0
    def predict(
        self,
        observation: np.ndarray,
        partner_idx: int = 0,
        state: Optional[np.ndarray] = None,
        mask: Optional[np.ndarray] = None,
        deterministic: bool = False,
    ) -> Tuple[np.ndarray, Optional[np.ndarray]]:
        """
        Get the policy action and state from an observation (and optional state).
        Includes sugar-coating to handle different observations (e.g. normalizing images).

        :param observation: (np.ndarray) the input observation
        :param state: (Optional[np.ndarray]) The last states (can be None, used in recurrent policies)
        :param mask: (Optional[np.ndarray]) The last masks (can be None, used in recurrent policies)
        :param deterministic: (bool) Whether or not to return deterministic actions.
        :return: (Tuple[np.ndarray, Optional[np.ndarray]]) the model's action and the next state
            (used in recurrent policies)
        """
        # TODO (GH/1): add support for RNN policies
        # if state is None:
        #     state = self.initial_state
        # if mask is None:
        #     mask = [False for _ in range(self.n_envs)]
        observation = np.array(observation)

        # Handle the different cases for images
        # as PyTorch use channel first format
        if is_image_space(self.observation_space) and not (
                observation.shape == self.observation_space.shape
                or observation.shape[1:] == self.observation_space.shape):
            # Try to re-order the channels
            transpose_obs = VecTransposeImage.transpose_image(observation)
            if (transpose_obs.shape == self.observation_space.shape or
                    transpose_obs.shape[1:] == self.observation_space.shape):
                observation = transpose_obs

        vectorized_env = is_vectorized_observation(observation,
                                                   self.observation_space)

        observation = observation.reshape((-1, ) +
                                          self.observation_space.shape)

        observation = th.as_tensor(observation).to(self.device)
        with th.no_grad():
            actions = self._predict(observation,
                                    partner_idx=partner_idx,
                                    deterministic=deterministic)
        # Convert to numpy
        actions = actions.cpu().numpy()

        if isinstance(self.action_space, gym.spaces.Box):
            if self.squash_output:
                # Rescale to proper domain when using squashing
                actions = self.unscale_action(actions)
            else:
                # Actions could be on arbitrary scale, so clip the actions to avoid
                # out of bound error (e.g. if sampling from a Gaussian distribution)
                actions = np.clip(actions, self.action_space.low,
                                  self.action_space.high)

        if not vectorized_env:
            if state is not None:
                raise ValueError(
                    "Error: The environment must be vectorized when using recurrent policies."
                )
            actions = actions[0]

        return actions, state
コード例 #19
0
    def __init__(self,
                 observation_space: gym.Space,
                 obs_unwrapper_function: Callable,
                 obs_space_dict: Dict[str, gym.Space],
                 normalize_images: bool,
                 features_dim: int = 20,
                 cnn_extractor_class: BaseFeaturesExtractor = NatureCNN,
                 cnn_feature_dim: int = 12,
                 mlp_net_arch: Iterable = (4, ),
                 mlp_feature_dim: int = 6,
                 embedding_dim: int = 6):
        super().__init__(observation_space, obs_unwrapper_function,
                         obs_space_dict, normalize_images, features_dim)

        # This gets the string obs spaces associated with each extractor
        # They're stored in a nested _ separated string
        self.split_chars = "__"
        self.inferred_extractor_mapping = self.recursive_space_infer(
            obs_space_dict)
        self.cnn_spaces = self.inferred_extractor_mapping['CNN']
        self.mlp_spaces = self.inferred_extractor_mapping['MLP']
        self.embed_spaces = self.inferred_extractor_mapping['EMBED']

        _cnn_extractors = []
        total_flattened_dim = 0

        # Create CNN extractors
        for space_designation in self.cnn_spaces:
            cnn_space = recursive_lookup_from_string(obs_space_dict,
                                                     space_designation,
                                                     self.split_chars)
            assert is_image_space(cnn_space)
            _cnn_extractors.append(
                cnn_extractor_class(cnn_space, cnn_feature_dim))
            total_flattened_dim += cnn_feature_dim
        self.cnn_extractors = nn.ModuleList(_cnn_extractors)

        # Create MLP Extractor
        total_mlp_dim = 0
        if len(self.mlp_spaces) > 0:
            for space_designation in self.mlp_spaces:
                mlp_space = recursive_lookup_from_string(
                    obs_space_dict, space_designation, self.split_chars)
                assert isinstance(mlp_space, gym.spaces.Box)
                # assume if the space is multi-dimensional, we'll flatten it
                # before sending it to a MLP
                n_dim = int(np.prod(mlp_space.shape))
                total_mlp_dim += n_dim
            self.mlp_extractor = nn.Sequential(
                *create_mlp(total_mlp_dim, mlp_feature_dim, mlp_net_arch))
            total_flattened_dim += mlp_feature_dim
        else:
            self.mlp_extractor = None

        # Create Embed tables
        if len(self.embed_spaces) > 0:
            _embedding_tables = []
            for space_designation in self.embed_spaces:
                embed_space = recursive_lookup_from_string(
                    obs_space_dict, space_designation, self.split_chars)
                assert isinstance(embed_space, gym.spaces.Discrete)
                space_n = embed_space.n
                _embedding_tables.append(
                    nn.Embedding(embedding_dim=embedding_dim,
                                 num_embeddings=space_n))
                total_flattened_dim += embedding_dim

            self.embedding_tables = nn.ModuleList(_embedding_tables)
        else:
            self.embedding_tables = None
        self.projection_layer = nn.Linear(total_flattened_dim, features_dim)
コード例 #20
0
    def create_envs(self,
                    n_envs: int,
                    eval_env: bool = False,
                    no_log: bool = False) -> VecEnv:
        """
        Create the environment and wrap it if necessary.

        :param n_envs:
        :param eval_env: Whether is it an environment used for evaluation or not
        :param no_log: Do not log training when doing hyperparameter optim
            (issue with writing the same file)
        :return: the vectorized environment, with appropriate wrappers
        """
        # Do not log eval env (issue with writing the same file)
        log_dir = None if eval_env or no_log else self.save_path

        monitor_kwargs = {}
        # Special case for GoalEnvs: log success rate too
        if "Neck" in self.env_id or self.is_robotics_env(
                self.env_id) or "parking-v0" in self.env_id:
            monitor_kwargs = dict(info_keywords=("is_success", ))

        # Note: made custom to support Gazebo Runtime wrapping
        def make_env():
            def _init():
                env = self.env_wrapper(env=self.env_id, **self.env_kwargs)
                env.seed(self.seed)
                env.action_space.seed(self.seed)

                monitor_path = log_dir if log_dir is not None else None
                if monitor_path is not None:
                    os.makedirs(log_dir, exist_ok=True)
                env = Monitor(env, filename=monitor_path, **monitor_kwargs)
                return env

            return _init

        if self.vec_env_class is None:
            self.vec_env_class = DummyVecEnv
        env = self.vec_env_class([make_env()], **self.vec_env_kwargs)

        # Wrap the env into a VecNormalize wrapper if needed
        # and load saved statistics when present
        env = self._maybe_normalize(env, eval_env)

        # Optional Frame-stacking
        if self.frame_stack is not None:
            n_stack = self.frame_stack
            env = VecFrameStack(env, n_stack)
            if self.verbose > 0:
                print(f"Stacking {n_stack} frames")

        # Wrap if needed to re-order channels
        # (switch from channel last to channel first convention)
        if is_image_space(
                env.observation_space) and not is_image_space_channels_first(
                    env.observation_space):
            if self.verbose > 0:
                print("Wrapping into a VecTransposeImage")
            env = VecTransposeImage(env)

        # check if wrapper for dict support is needed
        if self.algo == "her":
            if self.verbose > 0:
                print("Wrapping into a ObsDictWrapper")
            env = ObsDictWrapper(env)

        return env
コード例 #21
0
    def predict(
        self,
        observation: Union[np.ndarray, Dict[str, np.ndarray]],
        state: Optional[np.ndarray] = None,
        mask: Optional[np.ndarray] = None,
        deterministic: bool = False,
    ) -> Tuple[np.ndarray, Optional[np.ndarray]]:
        """
        Get the policy action and state from an observation (and optional state).
        Includes sugar-coating to handle different observations (e.g. normalizing images).

        :param observation: the input observation
        :param state: The last states (can be None, used in recurrent policies)
        :param mask: The last masks (can be None, used in recurrent policies)
        :param deterministic: Whether or not to return deterministic actions.
        :return: the model's action and the next state
            (used in recurrent policies)
        """
        # TODO (GH/1): add support for RNN policies
        # if state is None:
        #     state = self.initial_state
        # if mask is None:
        #     mask = [False for _ in range(self.n_envs)]
        # Switch to eval mode (this affects batch norm / dropout)
        self.eval()

        vectorized_env = False
        if isinstance(observation, dict):
            # need to copy the dict as the dict in VecFrameStack will become a torch tensor
            observation = copy.deepcopy(observation)
            for key, obs in observation.items():
                obs_space = self.observation_space.spaces[key]
                if is_image_space(obs_space):
                    obs_ = maybe_transpose(obs, obs_space)
                else:
                    obs_ = np.array(obs)
                vectorized_env = vectorized_env or is_vectorized_observation(obs_, obs_space)
                # Add batch dimension if needed
                observation[key] = obs_.reshape((-1,) + self.observation_space[key].shape)

        elif is_image_space(self.observation_space):
            # Handle the different cases for images
            # as PyTorch use channel first format
            observation = maybe_transpose(observation, self.observation_space)

        else:
            observation = np.array(observation)

        if not isinstance(observation, dict):
            # Dict obs need to be handled separately
            vectorized_env = is_vectorized_observation(observation, self.observation_space)
            # Add batch dimension if needed
            observation = observation.reshape((-1,) + self.observation_space.shape)

        observation = obs_as_tensor(observation, self.device)

        with th.no_grad():
            actions = self._predict(observation, deterministic=deterministic)
        # Convert to numpy
        actions = actions.cpu().numpy()

        if isinstance(self.action_space, gym.spaces.Box):
            if self.squash_output:
                # Rescale to proper domain when using squashing
                actions = self.unscale_action(actions)
            else:
                # Actions could be on arbitrary scale, so clip the actions to avoid
                # out of bound error (e.g. if sampling from a Gaussian distribution)
                actions = np.clip(actions, self.action_space.low, self.action_space.high)

        if not vectorized_env:
            if state is not None:
                raise ValueError("Error: The environment must be vectorized when using recurrent policies.")
            actions = actions[0]

        return actions, state
コード例 #22
0
    def __init__(self, venv: VecEnv):
        assert is_image_space(venv.observation_space), "The observation space must be an image"

        observation_space = self.transpose_space(venv.observation_space)
        super(VecTransposeImage, self).__init__(venv, observation_space=observation_space)
コード例 #23
0
    def __init__(self, observation_space: gym.spaces.Box):
        super(Benchmark, self).__init__(observation_space, self.output_channels)

        assert is_image_space(observation_space), "This feature extraction policy must be used with image spaces."

        self._setup()
コード例 #24
0
def image_transpose(env):
    if is_image_space(env.observation_space) and not is_image_space_channels_first(
        env.observation_space
    ):
        env = VecTransposeImage(env)
    return env