Python GenericPolicy Examples

Programming Language: Python

Namespace/Package Name: ray.rllib.agents.es.policies

Method/Function: GenericPolicy

Examples at hotexamples.com: 3

Python GenericPolicy - 3 examples found. These are the top rated real world Python examples of ray.rllib.agents.es.policies.GenericPolicy extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

    def _init(self, config, env_creator):
        # PyTorch check.
        if config["use_pytorch"]:
            raise ValueError(
                "ES does not support PyTorch yet! Use tf instead.")

        policy_params = {"action_noise_std": 0.01}

        env_context = EnvContext(config["env_config"] or {}, worker_index=0)
        env = env_creator(env_context)
        from ray.rllib import models
        preprocessor = models.ModelCatalog.get_preprocessor(env)

        self.sess = utils.make_session(single_threaded=False)
        self.policy = policies.GenericPolicy(
            self.sess, env.action_space, env.observation_space, preprocessor,
            config["observation_filter"], config["model"], **policy_params)
        self.optimizer = optimizers.Adam(self.policy, config["stepsize"])
        self.report_length = config["report_length"]

        # Create the shared noise table.
        logger.info("Creating shared noise table.")
        noise_id = create_shared_noise.remote(config["noise_size"])
        self.noise = SharedNoiseTable(ray.get(noise_id))

        # Create the actors.
        logger.info("Creating actors.")
        self._workers = [
            Worker.remote(config, policy_params, env_creator, noise_id,
                          idx + 1) for idx in range(config["num_workers"])
        ]

        self.episodes_so_far = 0
        self.reward_list = []
        self.tstart = time.time()

Example #2

Show file

    def _init(self):
        policy_params = {"action_noise_std": 0.01}

        env = self.env_creator(self.config["env_config"])
        from ray.rllib import models
        preprocessor = models.ModelCatalog.get_preprocessor(env)

        self.sess = utils.make_session(single_threaded=False)
        self.policy = policies.GenericPolicy(self.sess, env.action_space,
                                             preprocessor,
                                             self.config["observation_filter"],
                                             **policy_params)
        self.optimizer = optimizers.Adam(self.policy, self.config["stepsize"])
        self.report_length = self.config["report_length"]

        # Create the shared noise table.
        print("Creating shared noise table.")
        noise_id = create_shared_noise.remote(self.config["noise_size"])
        self.noise = SharedNoiseTable(ray.get(noise_id))

        # Create the actors.
        print("Creating actors.")
        self.workers = [
            Worker.remote(self.config, policy_params, self.env_creator,
                          noise_id) for _ in range(self.config["num_workers"])
        ]

        self.episodes_so_far = 0
        self.reward_list = []
        self.tstart = time.time()

Example #3

Show file

File: es.py Project: tryanswer/ray

    def __init__(self,
                 config,
                 policy_params,
                 env_creator,
                 noise,
                 min_task_runtime=0.2):
        self.min_task_runtime = min_task_runtime
        self.config = config
        self.policy_params = policy_params
        self.noise = SharedNoiseTable(noise)

        self.env = env_creator(config["env_config"])
        from ray.rllib import models
        self.preprocessor = models.ModelCatalog.get_preprocessor(self.env)

        self.sess = utils.make_session(single_threaded=True)
        self.policy = policies.GenericPolicy(
            self.sess, self.env.action_space, self.preprocessor,
            config["observation_filter"], **policy_params)