コード例 #1
0
    def _init(self, config, env_creator):
        validate_config(config)
        env_context = EnvContext(config["env_config"] or {}, worker_index=0)
        env = env_creator(env_context)
        self._policy_class = get_policy_class(config)
        self.policy = self._policy_class(obs_space=env.observation_space,
                                         action_space=env.action_space,
                                         config=config)
        self.optimizer = optimizers.Adam(self.policy, config["stepsize"])
        self.report_length = config["report_length"]

        # Create the shared noise table.
        logger.info("Creating shared noise table.")
        noise_id = create_shared_noise.remote(config["noise_size"])
        self.noise = SharedNoiseTable(ray.get(noise_id))

        # Create the actors.
        logger.info("Creating actors.")
        self._workers = [
            Worker.remote(config, {}, env_creator, noise_id, idx + 1)
            for idx in range(config["num_workers"])
        ]

        self.episodes_so_far = 0
        self.reward_list = []
        self.tstart = time.time()
コード例 #2
0
    def _init(self):
        policy_params = {"action_noise_std": 0.01}

        env = self.env_creator(self.config["env_config"])
        from ray.rllib import models
        preprocessor = models.ModelCatalog.get_preprocessor(env)

        self.sess = utils.make_session(single_threaded=False)
        self.policy = policies.GenericPolicy(self.sess, env.action_space,
                                             preprocessor,
                                             self.config["observation_filter"],
                                             **policy_params)
        self.optimizer = optimizers.Adam(self.policy, self.config["stepsize"])
        self.report_length = self.config["report_length"]

        # Create the shared noise table.
        print("Creating shared noise table.")
        noise_id = create_shared_noise.remote(self.config["noise_size"])
        self.noise = SharedNoiseTable(ray.get(noise_id))

        # Create the actors.
        print("Creating actors.")
        self.workers = [
            Worker.remote(self.config, policy_params, self.env_creator,
                          noise_id) for _ in range(self.config["num_workers"])
        ]

        self.episodes_so_far = 0
        self.reward_list = []
        self.tstart = time.time()
コード例 #3
0
    def _init(self, config, env_creator):
        # PyTorch check.
        if config["use_pytorch"]:
            raise ValueError(
                "ES does not support PyTorch yet! Use tf instead.")

        policy_params = {"action_noise_std": 0.01}

        env_context = EnvContext(config["env_config"] or {}, worker_index=0)
        env = env_creator(env_context)
        from ray.rllib import models
        preprocessor = models.ModelCatalog.get_preprocessor(env)

        self.sess = utils.make_session(single_threaded=False)
        self.policy = policies.GenericPolicy(
            self.sess, env.action_space, env.observation_space, preprocessor,
            config["observation_filter"], config["model"], **policy_params)
        self.optimizer = optimizers.Adam(self.policy, config["stepsize"])
        self.report_length = config["report_length"]

        # Create the shared noise table.
        logger.info("Creating shared noise table.")
        noise_id = create_shared_noise.remote(config["noise_size"])
        self.noise = SharedNoiseTable(ray.get(noise_id))

        # Create the actors.
        logger.info("Creating actors.")
        self._workers = [
            Worker.remote(config, policy_params, env_creator, noise_id,
                          idx + 1) for idx in range(config["num_workers"])
        ]

        self.episodes_so_far = 0
        self.reward_list = []
        self.tstart = time.time()
コード例 #4
0
ファイル: es.py プロジェクト: ijrsvt/ray
    def setup(self, config):
        # Setup our config: Merge the user-supplied config (which could
        # be a partial config dict with the class' default).
        self.config = self.merge_trainer_configs(self.get_default_config(),
                                                 config,
                                                 self._allow_unknown_configs)

        # Call super's validation method.
        self.validate_config(self.config)

        # Generate `self.env_creator` callable to create an env instance.
        self.env_creator = self._get_env_creator_from_env_id(self._env_id)
        # Generate the local env.
        env_context = EnvContext(self.config["env_config"] or {},
                                 worker_index=0)
        env = self.env_creator(env_context)

        self.callbacks = self.config["callbacks"]()

        self._policy_class = get_policy_class(self.config)
        self.policy = self._policy_class(
            obs_space=env.observation_space,
            action_space=env.action_space,
            config=self.config,
        )
        self.optimizer = optimizers.Adam(self.policy, self.config["stepsize"])
        self.report_length = self.config["report_length"]

        # Create the shared noise table.
        logger.info("Creating shared noise table.")
        noise_id = create_shared_noise.remote(self.config["noise_size"])
        self.noise = SharedNoiseTable(ray.get(noise_id))

        # Create the actors.
        logger.info("Creating actors.")
        self.workers = [
            Worker.remote(self.config, {}, self.env_creator, noise_id, idx + 1)
            for idx in range(self.config["num_workers"])
        ]

        self.episodes_so_far = 0
        self.reward_list = []
        self.tstart = time.time()
コード例 #5
0
    def _init(self, config, env_creator):
        policy_params = {"action_noise_std": 0.01}

        env = env_creator(config["env_config"])
        from ray.rllib import models
        preprocessor = models.ModelCatalog.get_preprocessor(env)

        self.sess = utils.make_session(single_threaded=False)
        self.policy = GenericGaussianPolicy(self.sess, env.action_space,
                                            env.observation_space,
                                            preprocessor,
                                            config["observation_filter"],
                                            config["model"], **policy_params)
        if config["optimizer_type"] == "adam":
            self.optimizer = optimizers.Adam(self.policy, config["stepsize"])
        elif config["optimizer_type"] == "sgd":
            self.optimizer = optimizers.SGD(self.policy, config["stepsize"])
        else:
            raise ValueError("optimizer must in [adam, sgd].")
        self.report_length = config["report_length"]

        # Create the shared noise table.
        logger.info("Creating shared noise table.")
        noise_id = create_shared_noise.remote(config["noise_size"])
        self.noise = SharedNoiseTable(ray.get(noise_id))

        # Create the actors.
        logger.info("Creating actors.")
        self._workers = [
            Worker.remote(config, policy_params, env_creator, noise_id)
            for _ in range(config["num_workers"])
        ]

        self.episodes_so_far = 0
        self.reward_list = []
        self.tstart = time.time()