Python SGD Examples

Programming Language: Python

Namespace/Package Name: ray.rllib.agents.es.optimizers

Method/Function: SGD

Examples at hotexamples.com: 3

Python SGD - 3 examples found. These are the top rated real world Python examples of ray.rllib.agents.es.optimizers.SGD extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: ars.py Project: RuofanKong/ray

    def _init(self, config, env_creator):
        validate_config(config)
        env_context = EnvContext(config["env_config"] or {}, worker_index=0)
        env = env_creator(env_context)

        self._policy_class = get_policy_class(config)
        self.policy = self._policy_class(env.observation_space,
                                         env.action_space, config)
        self.optimizer = optimizers.SGD(self.policy, config["sgd_stepsize"])

        self.rollouts_used = config["rollouts_used"]
        self.num_rollouts = config["num_rollouts"]
        self.report_length = config["report_length"]

        # Create the shared noise table.
        logger.info("Creating shared noise table.")
        noise_id = create_shared_noise.remote(config["noise_size"])
        self.noise = SharedNoiseTable(ray.get(noise_id))

        # Create the actors.
        logger.info("Creating actors.")
        self.workers = [
            Worker.remote(config, env_creator, noise_id, idx + 1)
            for idx in range(config["num_workers"])
        ]

        self.episodes_so_far = 0
        self.reward_list = []
        self.tstart = time.time()

Example #2

Show file

File: ars.py Project: krfricke/ray

    def setup(self, config):
        # Setup our config: Merge the user-supplied config (which could
        # be a partial config dict with the class' default).
        self.config = self.merge_trainer_configs(self.get_default_config(),
                                                 config,
                                                 self._allow_unknown_configs)

        # Validate our config dict.
        self.validate_config(self.config)

        # Generate `self.env_creator` callable to create an env instance.
        self.env_creator = self._get_env_creator_from_env_id(self._env_id)
        # Generate the local env.
        env_context = EnvContext(self.config["env_config"] or {},
                                 worker_index=0)
        env = self.env_creator(env_context)

        self.callbacks = self.config["callbacks"]()

        self._policy_class = get_policy_class(self.config)
        self.policy = self._policy_class(env.observation_space,
                                         env.action_space, self.config)
        self.optimizer = optimizers.SGD(self.policy,
                                        self.config["sgd_stepsize"])

        self.rollouts_used = self.config["rollouts_used"]
        self.num_rollouts = self.config["num_rollouts"]
        self.report_length = self.config["report_length"]

        # Create the shared noise table.
        logger.info("Creating shared noise table.")
        noise_id = create_shared_noise.remote(self.config["noise_size"])
        self.noise = SharedNoiseTable(ray.get(noise_id))

        # Create the actors.
        logger.info("Creating actors.")
        self.workers = [
            Worker.remote(self.config, self.env_creator, noise_id, idx + 1)
            for idx in range(self.config["num_workers"])
        ]

        self.episodes_so_far = 0
        self.reward_list = []
        self.tstart = time.time()

Example #3

Show file

    def _init(self, config, env_creator):
        policy_params = {"action_noise_std": 0.01}

        env = env_creator(config["env_config"])
        from ray.rllib import models
        preprocessor = models.ModelCatalog.get_preprocessor(env)

        self.sess = utils.make_session(single_threaded=False)
        self.policy = GenericGaussianPolicy(self.sess, env.action_space,
                                            env.observation_space,
                                            preprocessor,
                                            config["observation_filter"],
                                            config["model"], **policy_params)
        if config["optimizer_type"] == "adam":
            self.optimizer = optimizers.Adam(self.policy, config["stepsize"])
        elif config["optimizer_type"] == "sgd":
            self.optimizer = optimizers.SGD(self.policy, config["stepsize"])
        else:
            raise ValueError("optimizer must in [adam, sgd].")
        self.report_length = config["report_length"]

        # Create the shared noise table.
        logger.info("Creating shared noise table.")
        noise_id = create_shared_noise.remote(config["noise_size"])
        self.noise = SharedNoiseTable(ray.get(noise_id))

        # Create the actors.
        logger.info("Creating actors.")
        self._workers = [
            Worker.remote(config, policy_params, env_creator, noise_id)
            for _ in range(config["num_workers"])
        ]

        self.episodes_so_far = 0
        self.reward_list = []
        self.tstart = time.time()