예제 #1
0
 def test_basic(self):
     local = _MockWorker()
     remotes = ray.remote(_MockWorker)
     remote_workers = [remotes.remote() for i in range(5)]
     workers = WorkerSet._from_existing(local, remote_workers)
     test_optimizer = AsyncGradientsOptimizer(workers, grads_per_step=10)
     test_optimizer.step()
     self.assertTrue(all(local.get_weights() == 0))
예제 #2
0
 def testBasic(self):
     ray.init(num_cpus=4, object_store_memory=1000 * 1024 * 1024)
     local = _MockWorker()
     remotes = ray.remote(_MockWorker)
     remote_workers = [remotes.remote() for i in range(5)]
     workers = WorkerSet._from_existing(local, remote_workers)
     test_optimizer = AsyncGradientsOptimizer(workers, grads_per_step=10)
     test_optimizer.step()
     self.assertTrue(all(local.get_weights() == 0))
예제 #3
0
 def testBasic(self):
     ray.init(num_cpus=4)
     local = _MockEvaluator()
     remotes = ray.remote(_MockEvaluator)
     remote_evaluators = [remotes.remote() for i in range(5)]
     test_optimizer = AsyncGradientsOptimizer(local, remote_evaluators,
                                              {"grads_per_step": 10})
     test_optimizer.step()
     self.assertTrue(all(local.get_weights() == 0))
예제 #4
0
 def testBasic(self):
     ray.init(num_cpus=4)
     local = _MockEvaluator()
     remotes = ray.remote(_MockEvaluator)
     remote_evaluators = [remotes.remote() for i in range(5)]
     test_optimizer = AsyncGradientsOptimizer(local, remote_evaluators,
                                              {"grads_per_step": 10})
     test_optimizer.step()
     self.assertTrue(all(local.get_weights() == 0))
예제 #5
0
class BCAgent(Agent):
    _agent_name = "BC"
    _default_config = DEFAULT_CONFIG
    _allow_unknown_configs = True

    @classmethod
    def default_resource_request(cls, config):
        cf = merge_dicts(cls._default_config, config)
        if cf["use_gpu_for_workers"]:
            num_gpus_per_worker = 1
        else:
            num_gpus_per_worker = 0
        return Resources(
            cpu=1,
            gpu=cf["gpu"] and 1 or 0,
            extra_cpu=cf["num_workers"],
            extra_gpu=num_gpus_per_worker * cf["num_workers"])

    def _init(self):
        self.local_evaluator = BCEvaluator(self.env_creator, self.config,
                                           self.logdir)
        if self.config["use_gpu_for_workers"]:
            remote_cls = GPURemoteBCEvaluator
        else:
            remote_cls = RemoteBCEvaluator
        self.remote_evaluators = [
            remote_cls.remote(self.env_creator, self.config, self.logdir)
            for _ in range(self.config["num_workers"])
        ]
        self.optimizer = AsyncGradientsOptimizer(self.local_evaluator,
                                                 self.remote_evaluators,
                                                 self.config["optimizer"])

    def _train(self):
        self.optimizer.step()
        metric_lists = [
            re.get_metrics.remote() for re in self.remote_evaluators
        ]
        total_samples = 0
        total_loss = 0
        for metrics in metric_lists:
            for m in ray.get(metrics):
                total_samples += m["num_samples"]
                total_loss += m["loss"]
        result = dict(
            mean_loss=total_loss / total_samples,
            timesteps_this_iter=total_samples,
        )
        return result

    def compute_action(self, observation):
        action, info = self.local_evaluator.policy.compute(observation)
        return action
예제 #6
0
파일: a3c.py 프로젝트: velconia/ray
    def _init(self):
        if self.config["use_pytorch"]:
            from ray.rllib.a3c.a3c_torch_policy import A3CTorchPolicyGraph
            self.policy_cls = A3CTorchPolicyGraph
        else:
            from ray.rllib.a3c.a3c_tf_policy import A3CPolicyGraph
            self.policy_cls = A3CPolicyGraph

        if self.config["use_pytorch"]:
            session_creator = None
        else:
            import tensorflow as tf

            def session_creator():
                return tf.Session(
                    config=tf.ConfigProto(intra_op_parallelism_threads=1,
                                          inter_op_parallelism_threads=1,
                                          gpu_options=tf.GPUOptions(
                                              allow_growth=True)))

        remote_cls = CommonPolicyEvaluator.as_remote(
            num_gpus=1 if self.config["use_gpu_for_workers"] else 0)
        self.local_evaluator = CommonPolicyEvaluator(
            self.env_creator,
            self.config["multiagent"]["policy_graphs"] or self.policy_cls,
            policy_mapping_fn=self.config["multiagent"]["policy_mapping_fn"],
            batch_steps=self.config["batch_size"],
            batch_mode="truncate_episodes",
            tf_session_creator=session_creator,
            env_config=self.config["env_config"],
            model_config=self.config["model"],
            policy_config=self.config,
            num_envs=self.config["num_envs"])
        self.remote_evaluators = [
            remote_cls.remote(
                self.env_creator,
                self.config["multiagent"]["policy_graphs"] or self.policy_cls,
                policy_mapping_fn=(
                    self.config["multiagent"]["policy_mapping_fn"]),
                batch_steps=self.config["batch_size"],
                batch_mode="truncate_episodes",
                sample_async=True,
                tf_session_creator=session_creator,
                env_config=self.config["env_config"],
                model_config=self.config["model"],
                policy_config=self.config,
                num_envs=self.config["num_envs"],
                worker_index=i + 1) for i in range(self.config["num_workers"])
        ]

        self.optimizer = AsyncGradientsOptimizer(self.config["optimizer"],
                                                 self.local_evaluator,
                                                 self.remote_evaluators)
예제 #7
0
파일: bc.py 프로젝트: velconia/ray
 def _init(self):
     self.local_evaluator = BCEvaluator(
         self.env_creator, self.config, self.logdir)
     if self.config["use_gpu_for_workers"]:
         remote_cls = GPURemoteBCEvaluator
     else:
         remote_cls = RemoteBCEvaluator
     self.remote_evaluators = [
         remote_cls.remote(self.env_creator, self.config, self.logdir)
         for _ in range(self.config["num_workers"])]
     self.optimizer = AsyncGradientsOptimizer(
         self.config["optimizer"], self.local_evaluator,
         self.remote_evaluators)
예제 #8
0
    def _init(self):
        if self.config["use_pytorch"]:
            from ray.rllib.agents.a3c.a3c_torch_policy_graph import \
                A3CTorchPolicyGraph
            policy_cls = A3CTorchPolicyGraph
        else:
            from ray.rllib.agents.a3c.a3c_tf_policy_graph import A3CPolicyGraph
            policy_cls = A3CPolicyGraph

        self.local_evaluator = self.make_local_evaluator(
            self.env_creator, policy_cls)
        self.remote_evaluators = self.make_remote_evaluators(
            self.env_creator, policy_cls, self.config["num_workers"],
            {"num_gpus": 1 if self.config["use_gpu_for_workers"] else 0})
        self.optimizer = AsyncGradientsOptimizer(self.local_evaluator,
                                                 self.remote_evaluators,
                                                 self.config["optimizer"])
예제 #9
0
def make_async_optimizer(workers, config):
    return AsyncGradientsOptimizer(workers, **config["optimizer"])
예제 #10
0
파일: a3c.py 프로젝트: wanghuimu/ray
 def _make_optimizer(self):
     return AsyncGradientsOptimizer(self.local_evaluator,
                                    self.remote_evaluators,
                                    self.config["optimizer"])
예제 #11
0
class A3CAgent(Agent):
    """A3C implementations in TensorFlow and PyTorch."""

    _agent_name = "A3C"
    _default_config = DEFAULT_CONFIG

    @classmethod
    def default_resource_request(cls, config):
        cf = merge_dicts(cls._default_config, config)
        return Resources(
            cpu=1,
            gpu=0,
            extra_cpu=cf["num_workers"],
            extra_gpu=cf["use_gpu_for_workers"] and cf["num_workers"] or 0)

    def _init(self):
        if self.config["use_pytorch"]:
            from ray.rllib.agents.a3c.a3c_torch_policy_graph import \
                A3CTorchPolicyGraph
            policy_cls = A3CTorchPolicyGraph
        else:
            from ray.rllib.agents.a3c.a3c_tf_policy_graph import A3CPolicyGraph
            policy_cls = A3CPolicyGraph

        self.local_evaluator = self.make_local_evaluator(
            self.env_creator, policy_cls)
        self.remote_evaluators = self.make_remote_evaluators(
            self.env_creator, policy_cls, self.config["num_workers"],
            {"num_gpus": 1 if self.config["use_gpu_for_workers"] else 0})
        self.optimizer = AsyncGradientsOptimizer(self.local_evaluator,
                                                 self.remote_evaluators,
                                                 self.config["optimizer"])

    def _train(self):
        prev_steps = self.optimizer.num_steps_sampled
        self.optimizer.step()
        FilterManager.synchronize(self.local_evaluator.filters,
                                  self.remote_evaluators)
        result = self.optimizer.collect_metrics()
        result.update(timesteps_this_iter=self.optimizer.num_steps_sampled -
                      prev_steps)
        return result

    def _stop(self):
        # workaround for https://github.com/ray-project/ray/issues/1516
        for ev in self.remote_evaluators:
            ev.__ray_terminate__.remote()

    def _save(self, checkpoint_dir):
        checkpoint_path = os.path.join(checkpoint_dir,
                                       "checkpoint-{}".format(self.iteration))
        agent_state = ray.get(
            [a.save.remote() for a in self.remote_evaluators])
        extra_data = {
            "remote_state": agent_state,
            "local_state": self.local_evaluator.save()
        }
        pickle.dump(extra_data, open(checkpoint_path + ".extra_data", "wb"))
        return checkpoint_path

    def _restore(self, checkpoint_path):
        extra_data = pickle.load(open(checkpoint_path + ".extra_data", "rb"))
        ray.get([
            a.restore.remote(o)
            for a, o in zip(self.remote_evaluators, extra_data["remote_state"])
        ])
        self.local_evaluator.restore(extra_data["local_state"])
예제 #12
0
 def make_async_optimizer(workers, config):
     return AsyncGradientsOptimizer(workers, grads_per_step=100)
예제 #13
0
파일: a3c.py 프로젝트: velconia/ray
class A3CAgent(Agent):
    _agent_name = "A3C"
    _default_config = DEFAULT_CONFIG

    @classmethod
    def default_resource_request(cls, config):
        cf = dict(cls._default_config, **config)
        return Resources(
            cpu=1,
            gpu=0,
            extra_cpu=cf["num_workers"],
            extra_gpu=cf["use_gpu_for_workers"] and cf["num_workers"] or 0)

    def _init(self):
        if self.config["use_pytorch"]:
            from ray.rllib.a3c.a3c_torch_policy import A3CTorchPolicyGraph
            self.policy_cls = A3CTorchPolicyGraph
        else:
            from ray.rllib.a3c.a3c_tf_policy import A3CPolicyGraph
            self.policy_cls = A3CPolicyGraph

        if self.config["use_pytorch"]:
            session_creator = None
        else:
            import tensorflow as tf

            def session_creator():
                return tf.Session(
                    config=tf.ConfigProto(intra_op_parallelism_threads=1,
                                          inter_op_parallelism_threads=1,
                                          gpu_options=tf.GPUOptions(
                                              allow_growth=True)))

        remote_cls = CommonPolicyEvaluator.as_remote(
            num_gpus=1 if self.config["use_gpu_for_workers"] else 0)
        self.local_evaluator = CommonPolicyEvaluator(
            self.env_creator,
            self.config["multiagent"]["policy_graphs"] or self.policy_cls,
            policy_mapping_fn=self.config["multiagent"]["policy_mapping_fn"],
            batch_steps=self.config["batch_size"],
            batch_mode="truncate_episodes",
            tf_session_creator=session_creator,
            env_config=self.config["env_config"],
            model_config=self.config["model"],
            policy_config=self.config,
            num_envs=self.config["num_envs"])
        self.remote_evaluators = [
            remote_cls.remote(
                self.env_creator,
                self.config["multiagent"]["policy_graphs"] or self.policy_cls,
                policy_mapping_fn=(
                    self.config["multiagent"]["policy_mapping_fn"]),
                batch_steps=self.config["batch_size"],
                batch_mode="truncate_episodes",
                sample_async=True,
                tf_session_creator=session_creator,
                env_config=self.config["env_config"],
                model_config=self.config["model"],
                policy_config=self.config,
                num_envs=self.config["num_envs"],
                worker_index=i + 1) for i in range(self.config["num_workers"])
        ]

        self.optimizer = AsyncGradientsOptimizer(self.config["optimizer"],
                                                 self.local_evaluator,
                                                 self.remote_evaluators)

    def _train(self):
        self.optimizer.step()
        FilterManager.synchronize(self.local_evaluator.filters,
                                  self.remote_evaluators)
        result = collect_metrics(self.local_evaluator, self.remote_evaluators)
        result = result._replace(info=self.optimizer.stats())
        return result

    def _stop(self):
        # workaround for https://github.com/ray-project/ray/issues/1516
        for ev in self.remote_evaluators:
            ev.__ray_terminate__.remote()

    def _save(self, checkpoint_dir):
        checkpoint_path = os.path.join(checkpoint_dir,
                                       "checkpoint-{}".format(self.iteration))
        agent_state = ray.get(
            [a.save.remote() for a in self.remote_evaluators])
        extra_data = {
            "remote_state": agent_state,
            "local_state": self.local_evaluator.save()
        }
        pickle.dump(extra_data, open(checkpoint_path + ".extra_data", "wb"))
        return checkpoint_path

    def _restore(self, checkpoint_path):
        extra_data = pickle.load(open(checkpoint_path + ".extra_data", "rb"))
        ray.get([
            a.restore.remote(o)
            for a, o in zip(self.remote_evaluators, extra_data["remote_state"])
        ])
        self.local_evaluator.restore(extra_data["local_state"])

    def compute_action(self, observation, state=None):
        if state is None:
            state = []
        obs = self.local_evaluator.filters["default"](observation,
                                                      update=False)
        return self.local_evaluator.for_policy(
            lambda p: p.compute_single_action(obs, state, is_training=False)[0
                                                                             ])