def _init(self, config, env_creator): validate_config(config) env_context = EnvContext(config["env_config"] or {}, worker_index=0) env = env_creator(env_context) self._policy_class = get_policy_class(config) self.policy = self._policy_class(env.observation_space, env.action_space, config) self.optimizer = optimizers.SGD(self.policy, config["sgd_stepsize"]) self.rollouts_used = config["rollouts_used"] self.num_rollouts = config["num_rollouts"] self.report_length = config["report_length"] # Create the shared noise table. logger.info("Creating shared noise table.") noise_id = create_shared_noise.remote(config["noise_size"]) self.noise = SharedNoiseTable(ray.get(noise_id)) # Create the actors. logger.info("Creating actors.") self.workers = [ Worker.remote(config, env_creator, noise_id, idx + 1) for idx in range(config["num_workers"]) ] self.episodes_so_far = 0 self.reward_list = [] self.tstart = time.time()
def setup(self, config): # Setup our config: Merge the user-supplied config (which could # be a partial config dict with the class' default). self.config = self.merge_trainer_configs(self.get_default_config(), config, self._allow_unknown_configs) # Validate our config dict. self.validate_config(self.config) # Generate `self.env_creator` callable to create an env instance. self.env_creator = self._get_env_creator_from_env_id(self._env_id) # Generate the local env. env_context = EnvContext(self.config["env_config"] or {}, worker_index=0) env = self.env_creator(env_context) self.callbacks = self.config["callbacks"]() self._policy_class = get_policy_class(self.config) self.policy = self._policy_class(env.observation_space, env.action_space, self.config) self.optimizer = optimizers.SGD(self.policy, self.config["sgd_stepsize"]) self.rollouts_used = self.config["rollouts_used"] self.num_rollouts = self.config["num_rollouts"] self.report_length = self.config["report_length"] # Create the shared noise table. logger.info("Creating shared noise table.") noise_id = create_shared_noise.remote(self.config["noise_size"]) self.noise = SharedNoiseTable(ray.get(noise_id)) # Create the actors. logger.info("Creating actors.") self.workers = [ Worker.remote(self.config, self.env_creator, noise_id, idx + 1) for idx in range(self.config["num_workers"]) ] self.episodes_so_far = 0 self.reward_list = [] self.tstart = time.time()
def _init(self, config, env_creator): policy_params = {"action_noise_std": 0.01} env = env_creator(config["env_config"]) from ray.rllib import models preprocessor = models.ModelCatalog.get_preprocessor(env) self.sess = utils.make_session(single_threaded=False) self.policy = GenericGaussianPolicy(self.sess, env.action_space, env.observation_space, preprocessor, config["observation_filter"], config["model"], **policy_params) if config["optimizer_type"] == "adam": self.optimizer = optimizers.Adam(self.policy, config["stepsize"]) elif config["optimizer_type"] == "sgd": self.optimizer = optimizers.SGD(self.policy, config["stepsize"]) else: raise ValueError("optimizer must in [adam, sgd].") self.report_length = config["report_length"] # Create the shared noise table. logger.info("Creating shared noise table.") noise_id = create_shared_noise.remote(config["noise_size"]) self.noise = SharedNoiseTable(ray.get(noise_id)) # Create the actors. logger.info("Creating actors.") self._workers = [ Worker.remote(config, policy_params, env_creator, noise_id) for _ in range(config["num_workers"]) ] self.episodes_so_far = 0 self.reward_list = [] self.tstart = time.time()