コード例 #1
0
    def obs_to_tensor(
        self, observation: Union[np.ndarray, Dict[str, np.ndarray]]
    ) -> Tuple[th.Tensor, bool]:
        """
        Convert an input observation to a PyTorch tensor that can be fed to a model.
        Includes sugar-coating to handle different observations (e.g. normalizing images).

        :param observation: the input observation
        :return: The observation as PyTorch tensor
            and whether the observation is vectorized or not
        """
        vectorized_env = False
        if isinstance(observation, dict):
            # need to copy the dict as the dict in VecFrameStack will become a torch tensor
            observation = copy.deepcopy(observation)
            for key, obs in observation.items():
                obs_space = self.observation_space.spaces[key]
                if is_image_space(obs_space):
                    obs_ = maybe_transpose(obs, obs_space)
                else:
                    obs_ = np.array(obs)
                vectorized_env = vectorized_env or is_vectorized_observation(
                    obs_, obs_space)
                # Add batch dimension if needed
                if key != "scene_graph":
                    observation[key] = obs_.reshape(
                        (-1, ) + self.observation_space[key].shape)
                else:
                    observation[key] = obs_

        elif is_image_space(self.observation_space):
            # Handle the different cases for images
            # as PyTorch use channel first format
            observation = maybe_transpose(observation, self.observation_space)

        else:
            observation = np.array(observation)

        if not isinstance(observation, dict):
            # Dict obs need to be handled separately
            vectorized_env = is_vectorized_observation(observation,
                                                       self.observation_space)
            # Add batch dimension if needed
            observation = observation.reshape((-1, ) +
                                              self.observation_space.shape)

        observation = obs_as_tensor(observation, self.device)
        return observation, vectorized_env
コード例 #2
0
    def predict(self,
                observation: np.ndarray,
                state: Optional[np.ndarray] = None,
                mask: Optional[np.ndarray] = None,
                deterministic: bool = False,
                use_behav=None) -> Tuple[np.ndarray, Optional[np.ndarray]]:
        """
        Overrides the base_class predict function to include epsilon-greedy exploration.

        :param observation: the input observation
        :param state: The last states (can be None, used in recurrent policies)
        :param mask: The last masks (can be None, used in recurrent policies)
        :param deterministic: Whether or not to return deterministic actions.
        :return: the model's action and the next state
            (used in recurrent policies)
        """
        if not deterministic and np.random.rand() < self.exploration_rate:
            if is_vectorized_observation(
                    maybe_transpose(observation, self.observation_space),
                    self.observation_space):
                n_batch = observation.shape[0]
                action = np.array(
                    [self.action_space.sample() for _ in range(n_batch)])
            else:
                action = np.array(self.action_space.sample())
        else:
            action, state = self.policy.predict(observation, state, mask,
                                                deterministic)
        return action, state
コード例 #3
0
    def predict(self, observation: np.ndarray,
                state: Optional[np.ndarray] = None,
                mask: Optional[np.ndarray] = None,
                deterministic: bool = False) -> Tuple[np.ndarray, Optional[np.ndarray]]:
        """
        Overrides the base_class predict function to include epsilon-greedy exploration.

        :param observation: (np.ndarray) the input observation
        :param state: (Optional[np.ndarray]) The last states (can be None, used in recurrent policies)
        :param mask: (Optional[np.ndarray]) The last masks (can be None, used in recurrent policies)
        :param deterministic: (bool) Whether or not to return deterministic actions.
        :return: (Tuple[np.ndarray, Optional[np.ndarray]]) the model's action and the next state
            (used in recurrent policies), 'is_random_action'(0 or 1) indicates if the action taken was random or not
        """
        if not deterministic and np.random.rand() < self.exploration_rate:
            # choose random action
            n_batch = observation.shape[0]
            # action = np.array([self.action_space.sample() for _ in range(n_batch)])
            action = np.array([self.action_space.sample()])
            is_random_action = 1
            vectorized_env = is_vectorized_observation(observation, self.policy.observation_space)
            if not vectorized_env:
                action = action[0]
            # print("is random action",action)
        else:
            action, state = self.policy.predict(observation, state, mask, deterministic)
            is_random_action = 0
            # print("is nonrandom action", action, observation.shape[0])
        # if type(action) is not np.array:
        #     action = np.array([action])
        return action, state, is_random_action
コード例 #4
0
ファイル: policies.py プロジェクト: sateeshs/drl_grasping
    def predict(
        self,
        observation: np.ndarray,
        state: Optional[np.ndarray] = None,
        mask: Optional[np.ndarray] = None,
        deterministic: bool = False,
    ) -> Tuple[np.ndarray, Optional[np.ndarray]]:
        """
        Overriden to create proper Octree batch.
        Get the policy action and state from an observation (and optional state).

        :param observation: the input observation
        :param state: The last states (can be None, used in recurrent policies)
        :param mask: The last masks (can be None, used in recurrent policies)
        :param deterministic: Whether or not to return deterministic actions.
        :return: the model's action and the next state
            (used in recurrent policies)
        """
        if isinstance(observation, dict):
            observation = ObsDictWrapper.convert_dict(observation)
        else:
            observation = np.array(observation)

        vectorized_env = is_vectorized_observation(observation,
                                                   self.observation_space)

        if self._debug_write_octree:
            ocnn.write_octree(th.from_numpy(observation[-1]), 'octree.octree')

        # Make batch out of tensor (consisting of n-stacked octrees)
        octree_batch = preprocess_stacked_octree_batch(
            observation,
            self.device,
            separate_batches=self._separate_networks_for_stacks)

        with th.no_grad():
            actions = self._predict(octree_batch, deterministic=deterministic)
        # Convert to numpy
        actions = actions.cpu().numpy()

        if isinstance(self.action_space, gym.spaces.Box):
            if self.squash_output:
                # Rescale to proper domain when using squashing
                actions = self.unscale_action(actions)
            else:
                # Actions could be on arbitrary scale, so clip the actions to avoid
                # out of bound error (e.g. if sampling from a Gaussian distribution)
                actions = np.clip(actions, self.action_space.low,
                                  self.action_space.high)

        if not vectorized_env:
            if state is not None:
                raise ValueError(
                    "Error: The environment must be vectorized when using recurrent policies."
                )
            actions = actions[0]

        return actions, state
コード例 #5
0
    def predict(
        self,
        observation: np.ndarray,
        state: Optional[np.ndarray] = None,
        mask: Optional[np.ndarray] = None,
        deterministic: bool = False,
    ) -> Tuple[np.ndarray, Optional[np.ndarray]]:
        """
        Get the policy action and state from an observation (and optional state).
        Includes sugar-coating to handle different observations (e.g. normalizing images).

        :param observation: the input observation
        :param state: The last states (can be None, used in recurrent policies)
        :param mask: The last masks (can be None, used in recurrent policies)
        :param deterministic: Whether or not to return deterministic actions.
        :return: the model's action and the next state
            (used in recurrent policies)
        """
        # TODO (GH/1): add support for RNN policies
        # if state is None:
        #     state = self.initial_state
        # if mask is None:
        #     mask = [False for _ in range(self.n_envs)]
        if isinstance(observation, dict):
            observation = ObsDictWrapper.convert_dict(observation)
        else:
            observation = np.array(observation)

        # Handle the different cases for images
        # as PyTorch use channel first format
        observation = maybe_transpose(observation, self.observation_space)

        vectorized_env = is_vectorized_observation(observation, self.observation_space)

        observation = observation.reshape((-1,) + self.observation_space.shape)

        observation = th.as_tensor(observation).to(self.device)
        with th.no_grad():
            actions = self._predict(observation, deterministic=deterministic)
        # Convert to numpy
        actions = actions.cpu().numpy()

        if isinstance(self.action_space, gym.spaces.Box):
            if self.squash_output:
                # Rescale to proper domain when using squashing
                actions = self.unscale_action(actions)
            else:
                # Actions could be on arbitrary scale, so clip the actions to avoid
                # out of bound error (e.g. if sampling from a Gaussian distribution)
                actions = np.clip(actions, self.action_space.low, self.action_space.high)

        if not vectorized_env:
            if state is not None:
                raise ValueError("Error: The environment must be vectorized when using recurrent policies.")
            actions = actions[0]

        return actions, state
コード例 #6
0
    def preprocesses_obs_for_model(
            self,
            observation: Union[np.ndarray, Dict[str, np.ndarray]]
    ) -> Tuple[Union[np.ndarray, Dict[str, np.ndarray]], bool]:
        """
        Preporcesses obs both for prediction and evaluate action

        :param observation
        :return: Observation as PyTorch tensor
        :return:
        """
        vectorized_env = False
        if isinstance(observation, dict):
            # need to copy the dict as the dict in VecFrameStack will become a torch tensor
            observation = copy.deepcopy(observation)
            for key, obs in observation.items():
                obs_space = self.observation_space.spaces[key]
                if is_image_space(obs_space):
                    obs_ = maybe_transpose(obs, obs_space)
                else:
                    obs_ = np.array(obs)
                vectorized_env = vectorized_env or is_vectorized_observation(obs_, obs_space)
                # Add batch dimension if needed
                observation[key] = obs_.reshape((-1,) + self.observation_space[key].shape)

        elif is_image_space(self.observation_space):
            # Handle the different cases for images
            # as PyTorch use channel first format
            observation = maybe_transpose(observation, self.observation_space)

        else:
            observation = np.array(observation)

        if not isinstance(observation, dict):
            # Dict obs need to be handled separately
            vectorized_env = is_vectorized_observation(observation, self.observation_space)
            # Add batch dimension if needed
            observation = observation.reshape((-1,) + self.observation_space.shape)

        return observation, vectorized_env
コード例 #7
0
    def predict(
        self,
        observation: Union[np.ndarray, Dict[str, np.ndarray]],
        state: Optional[np.ndarray] = None,
        mask: Optional[np.ndarray] = None,
        deterministic: bool = False,
    ) -> Tuple[np.ndarray, Optional[np.ndarray]]:
        """
        Get the policy action and state from an observation (and optional state).
        Includes sugar-coating to handle different observations (e.g. normalizing images).

        :param observation: the input observation
        :param state: The last states (can be None, used in recurrent policies)
        :param mask: The last masks (can be None, used in recurrent policies)
        :param deterministic: Whether or not to return deterministic actions.
        :return: the model's action and the next state
            (used in recurrent policies)
        """
        # TODO (GH/1): add support for RNN policies
        # if state is None:
        #     state = self.initial_state
        # if mask is None:
        #     mask = [False for _ in range(self.n_envs)]
        # Switch to eval mode (this affects batch norm / dropout)
        self.eval()

        vectorized_env = False
        if isinstance(observation, dict):
            # need to copy the dict as the dict in VecFrameStack will become a torch tensor
            observation = copy.deepcopy(observation)
            for key, obs in observation.items():
                obs_space = self.observation_space.spaces[key]
                if is_image_space(obs_space):
                    obs_ = maybe_transpose(obs, obs_space)
                else:
                    obs_ = np.array(obs)
                vectorized_env = vectorized_env or is_vectorized_observation(obs_, obs_space)
                # Add batch dimension if needed
                observation[key] = obs_.reshape((-1,) + self.observation_space[key].shape)

        elif is_image_space(self.observation_space):
            # Handle the different cases for images
            # as PyTorch use channel first format
            observation = maybe_transpose(observation, self.observation_space)

        else:
            observation = np.array(observation)

        if not isinstance(observation, dict):
            # Dict obs need to be handled separately
            vectorized_env = is_vectorized_observation(observation, self.observation_space)
            # Add batch dimension if needed
            observation = observation.reshape((-1,) + self.observation_space.shape)

        observation = obs_as_tensor(observation, self.device)

        with th.no_grad():
            actions = self._predict(observation, deterministic=deterministic)
        # Convert to numpy
        actions = actions.cpu().numpy()

        if isinstance(self.action_space, gym.spaces.Box):
            if self.squash_output:
                # Rescale to proper domain when using squashing
                actions = self.unscale_action(actions)
            else:
                # Actions could be on arbitrary scale, so clip the actions to avoid
                # out of bound error (e.g. if sampling from a Gaussian distribution)
                actions = np.clip(actions, self.action_space.low, self.action_space.high)

        if not vectorized_env:
            if state is not None:
                raise ValueError("Error: The environment must be vectorized when using recurrent policies.")
            actions = actions[0]

        return actions, state
コード例 #8
0
def test_is_vectorized_observation():
    # with pytest.raises("ValueError"):
    #     pass
    # All vectorized
    box_space = spaces.Box(-1, 1, shape=(2, ))
    box_obs = np.ones((1, ) + box_space.shape)
    assert is_vectorized_observation(box_obs, box_space)

    discrete_space = spaces.Discrete(2)
    discrete_obs = np.ones((3, ), dtype=np.int8)
    assert is_vectorized_observation(discrete_obs, discrete_space)

    multidiscrete_space = spaces.MultiDiscrete([2, 3])
    multidiscrete_obs = np.ones((1, 2), dtype=np.int8)
    assert is_vectorized_observation(multidiscrete_obs, multidiscrete_space)

    multibinary_space = spaces.MultiBinary(3)
    multibinary_obs = np.ones((1, 3), dtype=np.int8)
    assert is_vectorized_observation(multibinary_obs, multibinary_space)

    dict_space = spaces.Dict({"box": box_space, "discrete": discrete_space})
    dict_obs = {"box": box_obs, "discrete": discrete_obs}
    assert is_vectorized_observation(dict_obs, dict_space)

    # All not vectorized
    box_obs = np.ones(box_space.shape)
    assert not is_vectorized_observation(box_obs, box_space)

    discrete_obs = np.ones((), dtype=np.int8)
    assert not is_vectorized_observation(discrete_obs, discrete_space)

    multidiscrete_obs = np.ones((2, ), dtype=np.int8)
    assert not is_vectorized_observation(multidiscrete_obs,
                                         multidiscrete_space)

    multibinary_obs = np.ones((3, ), dtype=np.int8)
    assert not is_vectorized_observation(multibinary_obs, multibinary_space)

    dict_obs = {"box": box_obs, "discrete": discrete_obs}
    assert not is_vectorized_observation(dict_obs, dict_space)

    # A mix of vectorized and non-vectorized things
    with pytest.raises(ValueError):
        discrete_obs = np.ones((1, ), dtype=np.int8)
        dict_obs = {"box": box_obs, "discrete": discrete_obs}
        is_vectorized_observation(dict_obs, dict_space)

    # Vectorized with the wrong shape
    with pytest.raises(ValueError):
        discrete_obs = np.ones((1, ), dtype=np.int8)
        box_obs = np.ones((1, 2) + box_space.shape)
        dict_obs = {"box": box_obs, "discrete": discrete_obs}
        is_vectorized_observation(dict_obs, dict_space)

    # Weird shape: error
    with pytest.raises(ValueError):
        discrete_obs = np.ones((1, ) + box_space.shape, dtype=np.int8)
        is_vectorized_observation(discrete_obs, discrete_space)

    # wrong shape
    with pytest.raises(ValueError):
        multidiscrete_obs = np.ones((2, 1), dtype=np.int8)
        is_vectorized_observation(multidiscrete_obs, multidiscrete_space)

    # wrong shape
    with pytest.raises(ValueError):
        multibinary_obs = np.ones((2, 1), dtype=np.int8)
        is_vectorized_observation(multidiscrete_obs, multibinary_space)

    # Almost good shape: one dimension too much for Discrete obs
    with pytest.raises(ValueError):
        box_obs = np.ones((1, ) + box_space.shape)
        discrete_obs = np.ones((1, 1), dtype=np.int8)
        dict_obs = {"box": box_obs, "discrete": discrete_obs}
        is_vectorized_observation(dict_obs, dict_space)
コード例 #9
0
ファイル: policies.py プロジェクト: wenry55/stable-baselines3
    def predict(
        self,
        observation: np.ndarray,
        state: Optional[np.ndarray] = None,
        mask: Optional[np.ndarray] = None,
        deterministic: bool = False
    ) -> Tuple[np.ndarray, Optional[np.ndarray]]:
        """
        Get the policy action and state from an observation (and optional state).
        Includes sugar-coating to handle different observations (e.g. normalizing images).

        :param observation: (np.ndarray) the input observation
        :param state: (Optional[np.ndarray]) The last states (can be None, used in recurrent policies)
        :param mask: (Optional[np.ndarray]) The last masks (can be None, used in recurrent policies)
        :param deterministic: (bool) Whether or not to return deterministic actions.
        :return: (Tuple[np.ndarray, Optional[np.ndarray]]) the model's action and the next state
            (used in recurrent policies)
        """
        # if state is None:
        #     state = self.initial_state
        # if mask is None:
        #     mask = [False for _ in range(self.n_envs)]
        observation = np.array(observation)

        # Handle the different cases for images
        # as PyTorch use channel first format
        if is_image_space(self.observation_space):
            if (observation.shape == self.observation_space.shape
                    or observation.shape[1:] == self.observation_space.shape):
                pass
            else:
                # Try to re-order the channels
                transpose_obs = VecTransposeImage.transpose_image(observation)
                if (transpose_obs.shape == self.observation_space.shape
                        or transpose_obs.shape[1:]
                        == self.observation_space.shape):
                    observation = transpose_obs

        vectorized_env = is_vectorized_observation(observation,
                                                   self.observation_space)

        observation = observation.reshape((-1, ) +
                                          self.observation_space.shape)

        observation = th.as_tensor(observation).to(self.device)
        with th.no_grad():
            actions = self._predict(observation, deterministic=deterministic)
        # Convert to numpy
        actions = actions.cpu().numpy()

        # Rescale to proper domain when using squashing
        if isinstance(self.action_space,
                      gym.spaces.Box) and self.squash_output:
            actions = self.unscale_action(actions)

        clipped_actions = actions
        # Clip the actions to avoid out of bound error when using gaussian distribution
        if isinstance(self.action_space,
                      gym.spaces.Box) and not self.squash_output:
            clipped_actions = np.clip(actions, self.action_space.low,
                                      self.action_space.high)

        if not vectorized_env:
            if state is not None:
                raise ValueError(
                    "Error: The environment must be vectorized when using recurrent policies."
                )
            clipped_actions = clipped_actions[0]

        return clipped_actions, state