def compute_actions(self, observation, add_noise=False, update=True): observation = self.preprocessor.transform(observation) observation = self.observation_filter(observation[None], update=update) action = self.sess.run(self.sampler, feed_dict={self.inputs: observation}) action = unbatch_actions(action) if add_noise and isinstance(self.action_space, gym.spaces.Box): action += np.random.randn(*action.shape) * self.action_noise_std return action
def compute_actions(self, observation, add_noise=False, update=True): observation = self.preprocessor.transform(observation) observation = self.observation_filter(observation[None], update=update) # `actions` is a list of (component) batches. actions = self.sess.run(self.sampler, feed_dict={self.inputs: observation}) if add_noise: actions = tree.map_structure(self._add_noise, actions, self.action_space_struct) # Convert `flat_actions` to a list of lists of action components # (list of single actions). actions = unbatch_actions(actions) return actions
def _compute_actions(policy, obs_batch, add_noise=False, update=True): observation = policy.preprocessor.transform(obs_batch) observation = policy.observation_filter( observation[None], update=update) observation = convert_to_torch_tensor(observation) dist_inputs, _ = policy.model({ SampleBatch.CUR_OBS: observation }, [], None) dist = policy.dist_class(dist_inputs, policy.model) action = dist.sample().detach().numpy() action = unbatch_actions(action) if add_noise and isinstance(policy.action_space, gym.spaces.Box): action += np.random.randn(*action.shape) * policy.action_noise_std return action