Example #1
0
 def compute_actions(self, observation, add_noise=False, update=True):
     observation = self.preprocessor.transform(observation)
     observation = self.observation_filter(observation[None], update=update)
     action = self.sess.run(self.sampler,
                            feed_dict={self.inputs: observation})
     action = unbatch_actions(action)
     if add_noise and isinstance(self.action_space, gym.spaces.Box):
         action += np.random.randn(*action.shape) * self.action_noise_std
     return action
Example #2
0
 def compute_actions(self, observation, add_noise=False, update=True):
     observation = self.preprocessor.transform(observation)
     observation = self.observation_filter(observation[None], update=update)
     # `actions` is a list of (component) batches.
     actions = self.sess.run(self.sampler,
                             feed_dict={self.inputs: observation})
     if add_noise:
         actions = tree.map_structure(self._add_noise, actions,
                                      self.action_space_struct)
     # Convert `flat_actions` to a list of lists of action components
     # (list of single actions).
     actions = unbatch_actions(actions)
     return actions
Example #3
0
    def _compute_actions(policy, obs_batch, add_noise=False, update=True):
        observation = policy.preprocessor.transform(obs_batch)
        observation = policy.observation_filter(
            observation[None], update=update)

        observation = convert_to_torch_tensor(observation)
        dist_inputs, _ = policy.model({
            SampleBatch.CUR_OBS: observation
        }, [], None)
        dist = policy.dist_class(dist_inputs, policy.model)
        action = dist.sample().detach().numpy()
        action = unbatch_actions(action)
        if add_noise and isinstance(policy.action_space, gym.spaces.Box):
            action += np.random.randn(*action.shape) * policy.action_noise_std
        return action