def obs_to_tensor( self, observation: Union[np.ndarray, Dict[str, np.ndarray]] ) -> Tuple[th.Tensor, bool]: """ Convert an input observation to a PyTorch tensor that can be fed to a model. Includes sugar-coating to handle different observations (e.g. normalizing images). :param observation: the input observation :return: The observation as PyTorch tensor and whether the observation is vectorized or not """ vectorized_env = False if isinstance(observation, dict): # need to copy the dict as the dict in VecFrameStack will become a torch tensor observation = copy.deepcopy(observation) for key, obs in observation.items(): obs_space = self.observation_space.spaces[key] if is_image_space(obs_space): obs_ = maybe_transpose(obs, obs_space) else: obs_ = np.array(obs) vectorized_env = vectorized_env or is_vectorized_observation( obs_, obs_space) # Add batch dimension if needed if key != "scene_graph": observation[key] = obs_.reshape( (-1, ) + self.observation_space[key].shape) else: observation[key] = obs_ elif is_image_space(self.observation_space): # Handle the different cases for images # as PyTorch use channel first format observation = maybe_transpose(observation, self.observation_space) else: observation = np.array(observation) if not isinstance(observation, dict): # Dict obs need to be handled separately vectorized_env = is_vectorized_observation(observation, self.observation_space) # Add batch dimension if needed observation = observation.reshape((-1, ) + self.observation_space.shape) observation = obs_as_tensor(observation, self.device) return observation, vectorized_env
def predict(self, observation: np.ndarray, state: Optional[np.ndarray] = None, mask: Optional[np.ndarray] = None, deterministic: bool = False, use_behav=None) -> Tuple[np.ndarray, Optional[np.ndarray]]: """ Overrides the base_class predict function to include epsilon-greedy exploration. :param observation: the input observation :param state: The last states (can be None, used in recurrent policies) :param mask: The last masks (can be None, used in recurrent policies) :param deterministic: Whether or not to return deterministic actions. :return: the model's action and the next state (used in recurrent policies) """ if not deterministic and np.random.rand() < self.exploration_rate: if is_vectorized_observation( maybe_transpose(observation, self.observation_space), self.observation_space): n_batch = observation.shape[0] action = np.array( [self.action_space.sample() for _ in range(n_batch)]) else: action = np.array(self.action_space.sample()) else: action, state = self.policy.predict(observation, state, mask, deterministic) return action, state
def predict(self, observation: np.ndarray, state: Optional[np.ndarray] = None, mask: Optional[np.ndarray] = None, deterministic: bool = False) -> Tuple[np.ndarray, Optional[np.ndarray]]: """ Overrides the base_class predict function to include epsilon-greedy exploration. :param observation: (np.ndarray) the input observation :param state: (Optional[np.ndarray]) The last states (can be None, used in recurrent policies) :param mask: (Optional[np.ndarray]) The last masks (can be None, used in recurrent policies) :param deterministic: (bool) Whether or not to return deterministic actions. :return: (Tuple[np.ndarray, Optional[np.ndarray]]) the model's action and the next state (used in recurrent policies), 'is_random_action'(0 or 1) indicates if the action taken was random or not """ if not deterministic and np.random.rand() < self.exploration_rate: # choose random action n_batch = observation.shape[0] # action = np.array([self.action_space.sample() for _ in range(n_batch)]) action = np.array([self.action_space.sample()]) is_random_action = 1 vectorized_env = is_vectorized_observation(observation, self.policy.observation_space) if not vectorized_env: action = action[0] # print("is random action",action) else: action, state = self.policy.predict(observation, state, mask, deterministic) is_random_action = 0 # print("is nonrandom action", action, observation.shape[0]) # if type(action) is not np.array: # action = np.array([action]) return action, state, is_random_action
def predict( self, observation: np.ndarray, state: Optional[np.ndarray] = None, mask: Optional[np.ndarray] = None, deterministic: bool = False, ) -> Tuple[np.ndarray, Optional[np.ndarray]]: """ Overriden to create proper Octree batch. Get the policy action and state from an observation (and optional state). :param observation: the input observation :param state: The last states (can be None, used in recurrent policies) :param mask: The last masks (can be None, used in recurrent policies) :param deterministic: Whether or not to return deterministic actions. :return: the model's action and the next state (used in recurrent policies) """ if isinstance(observation, dict): observation = ObsDictWrapper.convert_dict(observation) else: observation = np.array(observation) vectorized_env = is_vectorized_observation(observation, self.observation_space) if self._debug_write_octree: ocnn.write_octree(th.from_numpy(observation[-1]), 'octree.octree') # Make batch out of tensor (consisting of n-stacked octrees) octree_batch = preprocess_stacked_octree_batch( observation, self.device, separate_batches=self._separate_networks_for_stacks) with th.no_grad(): actions = self._predict(octree_batch, deterministic=deterministic) # Convert to numpy actions = actions.cpu().numpy() if isinstance(self.action_space, gym.spaces.Box): if self.squash_output: # Rescale to proper domain when using squashing actions = self.unscale_action(actions) else: # Actions could be on arbitrary scale, so clip the actions to avoid # out of bound error (e.g. if sampling from a Gaussian distribution) actions = np.clip(actions, self.action_space.low, self.action_space.high) if not vectorized_env: if state is not None: raise ValueError( "Error: The environment must be vectorized when using recurrent policies." ) actions = actions[0] return actions, state
def predict( self, observation: np.ndarray, state: Optional[np.ndarray] = None, mask: Optional[np.ndarray] = None, deterministic: bool = False, ) -> Tuple[np.ndarray, Optional[np.ndarray]]: """ Get the policy action and state from an observation (and optional state). Includes sugar-coating to handle different observations (e.g. normalizing images). :param observation: the input observation :param state: The last states (can be None, used in recurrent policies) :param mask: The last masks (can be None, used in recurrent policies) :param deterministic: Whether or not to return deterministic actions. :return: the model's action and the next state (used in recurrent policies) """ # TODO (GH/1): add support for RNN policies # if state is None: # state = self.initial_state # if mask is None: # mask = [False for _ in range(self.n_envs)] if isinstance(observation, dict): observation = ObsDictWrapper.convert_dict(observation) else: observation = np.array(observation) # Handle the different cases for images # as PyTorch use channel first format observation = maybe_transpose(observation, self.observation_space) vectorized_env = is_vectorized_observation(observation, self.observation_space) observation = observation.reshape((-1,) + self.observation_space.shape) observation = th.as_tensor(observation).to(self.device) with th.no_grad(): actions = self._predict(observation, deterministic=deterministic) # Convert to numpy actions = actions.cpu().numpy() if isinstance(self.action_space, gym.spaces.Box): if self.squash_output: # Rescale to proper domain when using squashing actions = self.unscale_action(actions) else: # Actions could be on arbitrary scale, so clip the actions to avoid # out of bound error (e.g. if sampling from a Gaussian distribution) actions = np.clip(actions, self.action_space.low, self.action_space.high) if not vectorized_env: if state is not None: raise ValueError("Error: The environment must be vectorized when using recurrent policies.") actions = actions[0] return actions, state
def preprocesses_obs_for_model( self, observation: Union[np.ndarray, Dict[str, np.ndarray]] ) -> Tuple[Union[np.ndarray, Dict[str, np.ndarray]], bool]: """ Preporcesses obs both for prediction and evaluate action :param observation :return: Observation as PyTorch tensor :return: """ vectorized_env = False if isinstance(observation, dict): # need to copy the dict as the dict in VecFrameStack will become a torch tensor observation = copy.deepcopy(observation) for key, obs in observation.items(): obs_space = self.observation_space.spaces[key] if is_image_space(obs_space): obs_ = maybe_transpose(obs, obs_space) else: obs_ = np.array(obs) vectorized_env = vectorized_env or is_vectorized_observation(obs_, obs_space) # Add batch dimension if needed observation[key] = obs_.reshape((-1,) + self.observation_space[key].shape) elif is_image_space(self.observation_space): # Handle the different cases for images # as PyTorch use channel first format observation = maybe_transpose(observation, self.observation_space) else: observation = np.array(observation) if not isinstance(observation, dict): # Dict obs need to be handled separately vectorized_env = is_vectorized_observation(observation, self.observation_space) # Add batch dimension if needed observation = observation.reshape((-1,) + self.observation_space.shape) return observation, vectorized_env
def predict( self, observation: Union[np.ndarray, Dict[str, np.ndarray]], state: Optional[np.ndarray] = None, mask: Optional[np.ndarray] = None, deterministic: bool = False, ) -> Tuple[np.ndarray, Optional[np.ndarray]]: """ Get the policy action and state from an observation (and optional state). Includes sugar-coating to handle different observations (e.g. normalizing images). :param observation: the input observation :param state: The last states (can be None, used in recurrent policies) :param mask: The last masks (can be None, used in recurrent policies) :param deterministic: Whether or not to return deterministic actions. :return: the model's action and the next state (used in recurrent policies) """ # TODO (GH/1): add support for RNN policies # if state is None: # state = self.initial_state # if mask is None: # mask = [False for _ in range(self.n_envs)] # Switch to eval mode (this affects batch norm / dropout) self.eval() vectorized_env = False if isinstance(observation, dict): # need to copy the dict as the dict in VecFrameStack will become a torch tensor observation = copy.deepcopy(observation) for key, obs in observation.items(): obs_space = self.observation_space.spaces[key] if is_image_space(obs_space): obs_ = maybe_transpose(obs, obs_space) else: obs_ = np.array(obs) vectorized_env = vectorized_env or is_vectorized_observation(obs_, obs_space) # Add batch dimension if needed observation[key] = obs_.reshape((-1,) + self.observation_space[key].shape) elif is_image_space(self.observation_space): # Handle the different cases for images # as PyTorch use channel first format observation = maybe_transpose(observation, self.observation_space) else: observation = np.array(observation) if not isinstance(observation, dict): # Dict obs need to be handled separately vectorized_env = is_vectorized_observation(observation, self.observation_space) # Add batch dimension if needed observation = observation.reshape((-1,) + self.observation_space.shape) observation = obs_as_tensor(observation, self.device) with th.no_grad(): actions = self._predict(observation, deterministic=deterministic) # Convert to numpy actions = actions.cpu().numpy() if isinstance(self.action_space, gym.spaces.Box): if self.squash_output: # Rescale to proper domain when using squashing actions = self.unscale_action(actions) else: # Actions could be on arbitrary scale, so clip the actions to avoid # out of bound error (e.g. if sampling from a Gaussian distribution) actions = np.clip(actions, self.action_space.low, self.action_space.high) if not vectorized_env: if state is not None: raise ValueError("Error: The environment must be vectorized when using recurrent policies.") actions = actions[0] return actions, state
def test_is_vectorized_observation(): # with pytest.raises("ValueError"): # pass # All vectorized box_space = spaces.Box(-1, 1, shape=(2, )) box_obs = np.ones((1, ) + box_space.shape) assert is_vectorized_observation(box_obs, box_space) discrete_space = spaces.Discrete(2) discrete_obs = np.ones((3, ), dtype=np.int8) assert is_vectorized_observation(discrete_obs, discrete_space) multidiscrete_space = spaces.MultiDiscrete([2, 3]) multidiscrete_obs = np.ones((1, 2), dtype=np.int8) assert is_vectorized_observation(multidiscrete_obs, multidiscrete_space) multibinary_space = spaces.MultiBinary(3) multibinary_obs = np.ones((1, 3), dtype=np.int8) assert is_vectorized_observation(multibinary_obs, multibinary_space) dict_space = spaces.Dict({"box": box_space, "discrete": discrete_space}) dict_obs = {"box": box_obs, "discrete": discrete_obs} assert is_vectorized_observation(dict_obs, dict_space) # All not vectorized box_obs = np.ones(box_space.shape) assert not is_vectorized_observation(box_obs, box_space) discrete_obs = np.ones((), dtype=np.int8) assert not is_vectorized_observation(discrete_obs, discrete_space) multidiscrete_obs = np.ones((2, ), dtype=np.int8) assert not is_vectorized_observation(multidiscrete_obs, multidiscrete_space) multibinary_obs = np.ones((3, ), dtype=np.int8) assert not is_vectorized_observation(multibinary_obs, multibinary_space) dict_obs = {"box": box_obs, "discrete": discrete_obs} assert not is_vectorized_observation(dict_obs, dict_space) # A mix of vectorized and non-vectorized things with pytest.raises(ValueError): discrete_obs = np.ones((1, ), dtype=np.int8) dict_obs = {"box": box_obs, "discrete": discrete_obs} is_vectorized_observation(dict_obs, dict_space) # Vectorized with the wrong shape with pytest.raises(ValueError): discrete_obs = np.ones((1, ), dtype=np.int8) box_obs = np.ones((1, 2) + box_space.shape) dict_obs = {"box": box_obs, "discrete": discrete_obs} is_vectorized_observation(dict_obs, dict_space) # Weird shape: error with pytest.raises(ValueError): discrete_obs = np.ones((1, ) + box_space.shape, dtype=np.int8) is_vectorized_observation(discrete_obs, discrete_space) # wrong shape with pytest.raises(ValueError): multidiscrete_obs = np.ones((2, 1), dtype=np.int8) is_vectorized_observation(multidiscrete_obs, multidiscrete_space) # wrong shape with pytest.raises(ValueError): multibinary_obs = np.ones((2, 1), dtype=np.int8) is_vectorized_observation(multidiscrete_obs, multibinary_space) # Almost good shape: one dimension too much for Discrete obs with pytest.raises(ValueError): box_obs = np.ones((1, ) + box_space.shape) discrete_obs = np.ones((1, 1), dtype=np.int8) dict_obs = {"box": box_obs, "discrete": discrete_obs} is_vectorized_observation(dict_obs, dict_space)
def predict( self, observation: np.ndarray, state: Optional[np.ndarray] = None, mask: Optional[np.ndarray] = None, deterministic: bool = False ) -> Tuple[np.ndarray, Optional[np.ndarray]]: """ Get the policy action and state from an observation (and optional state). Includes sugar-coating to handle different observations (e.g. normalizing images). :param observation: (np.ndarray) the input observation :param state: (Optional[np.ndarray]) The last states (can be None, used in recurrent policies) :param mask: (Optional[np.ndarray]) The last masks (can be None, used in recurrent policies) :param deterministic: (bool) Whether or not to return deterministic actions. :return: (Tuple[np.ndarray, Optional[np.ndarray]]) the model's action and the next state (used in recurrent policies) """ # if state is None: # state = self.initial_state # if mask is None: # mask = [False for _ in range(self.n_envs)] observation = np.array(observation) # Handle the different cases for images # as PyTorch use channel first format if is_image_space(self.observation_space): if (observation.shape == self.observation_space.shape or observation.shape[1:] == self.observation_space.shape): pass else: # Try to re-order the channels transpose_obs = VecTransposeImage.transpose_image(observation) if (transpose_obs.shape == self.observation_space.shape or transpose_obs.shape[1:] == self.observation_space.shape): observation = transpose_obs vectorized_env = is_vectorized_observation(observation, self.observation_space) observation = observation.reshape((-1, ) + self.observation_space.shape) observation = th.as_tensor(observation).to(self.device) with th.no_grad(): actions = self._predict(observation, deterministic=deterministic) # Convert to numpy actions = actions.cpu().numpy() # Rescale to proper domain when using squashing if isinstance(self.action_space, gym.spaces.Box) and self.squash_output: actions = self.unscale_action(actions) clipped_actions = actions # Clip the actions to avoid out of bound error when using gaussian distribution if isinstance(self.action_space, gym.spaces.Box) and not self.squash_output: clipped_actions = np.clip(actions, self.action_space.low, self.action_space.high) if not vectorized_env: if state is not None: raise ValueError( "Error: The environment must be vectorized when using recurrent policies." ) clipped_actions = clipped_actions[0] return clipped_actions, state