Python unbatch_actions Examples

Programming Language: Python

Namespace/Package Name: ray.rllib.evaluation.sampler

Method/Function: unbatch_actions

Examples at hotexamples.com: 3

Python unbatch_actions - 3 examples found. These are the top rated real world Python examples of ray.rllib.evaluation.sampler.unbatch_actions extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: ars_tf_policy.py Project: zhangjiekui/ray

 def compute_actions(self, observation, add_noise=False, update=True):
     observation = self.preprocessor.transform(observation)
     observation = self.observation_filter(observation[None], update=update)
     action = self.sess.run(self.sampler,
                            feed_dict={self.inputs: observation})
     action = unbatch_actions(action)
     if add_noise and isinstance(self.action_space, gym.spaces.Box):
         action += np.random.randn(*action.shape) * self.action_noise_std
     return action

Example #2

Show file

 def compute_actions(self, observation, add_noise=False, update=True):
     observation = self.preprocessor.transform(observation)
     observation = self.observation_filter(observation[None], update=update)
     # `actions` is a list of (component) batches.
     actions = self.sess.run(self.sampler,
                             feed_dict={self.inputs: observation})
     if add_noise:
         actions = tree.map_structure(self._add_noise, actions,
                                      self.action_space_struct)
     # Convert `flat_actions` to a list of lists of action components
     # (list of single actions).
     actions = unbatch_actions(actions)
     return actions

Example #3

Show file

    def _compute_actions(policy, obs_batch, add_noise=False, update=True):
        observation = policy.preprocessor.transform(obs_batch)
        observation = policy.observation_filter(
            observation[None], update=update)

        observation = convert_to_torch_tensor(observation)
        dist_inputs, _ = policy.model({
            SampleBatch.CUR_OBS: observation
        }, [], None)
        dist = policy.dist_class(dist_inputs, policy.model)
        action = dist.sample().detach().numpy()
        action = unbatch_actions(action)
        if add_noise and isinstance(policy.action_space, gym.spaces.Box):
            action += np.random.randn(*action.shape) * policy.action_noise_std
        return action