Python LinearPolicy Examples

Programming Language: Python

Namespace/Package Name: ray.rllib.agents.ars.policies

Method/Function: LinearPolicy

Examples at hotexamples.com: 2

Python LinearPolicy - 2 examples found. These are the top rated real world Python examples of ray.rllib.agents.ars.policies.LinearPolicy extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: ars.py Project: yuanfeng0905/ray

    def __init__(self,
                 config,
                 policy_params,
                 env_creator,
                 noise,
                 min_task_runtime=0.2):
        self.min_task_runtime = min_task_runtime
        self.config = config
        self.policy_params = policy_params
        self.noise = SharedNoiseTable(noise)

        self.env = env_creator(config["env_config"])
        from ray.rllib import models
        self.preprocessor = models.ModelCatalog.get_preprocessor(self.env)

        self.sess = utils.make_session(single_threaded=True)
        if config["policy_type"] == "LinearPolicy":
            self.policy = policies.LinearPolicy(
                self.sess, self.env.action_space, self.preprocessor,
                config["observation_filter"], **policy_params)
        else:
            self.policy = policies.MLPPolicy(
                self.sess, self.env.action_space, self.preprocessor,
                config["observation_filter"], config["fcnet_hiddens"],
                **policy_params)

Example #2

Show file

File: ars.py Project: jiankangren/ray

    def _init(self):
        policy_params = {"action_noise_std": 0.0}

        # register the linear network
        utils.register_linear_network()

        env = self.env_creator(self.config["env_config"])
        from ray.rllib import models
        preprocessor = models.ModelCatalog.get_preprocessor(env)

        self.sess = utils.make_session(single_threaded=False)
        if self.config["policy_type"] == "LinearPolicy":
            self.policy = policies.LinearPolicy(
                self.sess, env.action_space, preprocessor,
                self.config["observation_filter"], self.config["model"],
                **policy_params)
        else:
            self.policy = policies.MLPPolicy(self.sess, env.action_space,
                                             preprocessor,
                                             self.config["observation_filter"],
                                             self.config["model"],
                                             self.config["fcnet_hiddens"],
                                             **policy_params)
        self.optimizer = optimizers.SGD(self.policy,
                                        self.config["sgd_stepsize"])

        self.rollouts_used = self.config["rollouts_used"]
        self.num_rollouts = self.config["num_rollouts"]
        self.report_length = self.config["report_length"]

        # Create the shared noise table.
        print("Creating shared noise table.")
        noise_id = create_shared_noise.remote(self.config["noise_size"])
        self.noise = SharedNoiseTable(ray.get(noise_id))

        # Create the actors.
        print("Creating actors.")
        self.workers = [
            Worker.remote(self.config, policy_params, self.env_creator,
                          noise_id) for _ in range(self.config["num_workers"])
        ]

        self.episodes_so_far = 0
        self.timesteps_so_far = 0
        self.reward_list = []
        self.tstart = time.time()