Exemple #1
0
    def test_high_load(self):
        workers = [
            RemoteRLlibActor.remote(sleep_time=random.random() * 2.0) for _ in range(60)
        ]
        manager = AsyncRequestsManager(
            workers,
            max_remote_requests_in_flight_per_worker=2,
            return_object_refs=True,
            ray_wait_timeout_s=0.0,
        )
        num_ready = 0
        for i in range(2000):
            manager.call_on_all_available(lambda w: w.task())
            time.sleep(0.01)

            ready = manager.get_ready()

            for reqs in ready.values():
                num_ready += len(reqs)
                ray.get(reqs)

            for worker in ready.keys():
                worker.task2.remote(1, 3)

        time.sleep(20)

        ready = manager.get_ready()
        num_ready += sum(len(reqs) for reqs in ready.values())

        actually_called = sum(
            ray.get(
                [worker.apply.remote(lambda w: w.num_task_called) for worker in workers]
            )
        )
        assert actually_called == num_ready, (actually_called, num_ready)
Exemple #2
0
    def setup(self, config: PartialTrainerConfigDict):
        super().setup(config)

        # Initialize torch process group for
        if self.config["_disable_execution_plan_api"] is True:
            self._curr_learner_info = {}
            ip = ray.get(self.workers.remote_workers()[0].get_node_ip.remote())
            port = ray.get(
                self.workers.remote_workers()[0].find_free_port.remote())
            address = "tcp://{ip}:{port}".format(ip=ip, port=port)
            logger.info(
                "Creating torch process group with leader {}".format(address))

            # Get setup tasks in order to throw errors on failure.
            ray.get([
                worker.setup_torch_data_parallel.remote(
                    url=address,
                    world_rank=i,
                    world_size=len(self.workers.remote_workers()),
                    backend=self.config["torch_distributed_backend"],
                ) for i, worker in enumerate(self.workers.remote_workers())
            ])
            logger.info("Torch process group init completed")
            self._ddppo_worker_manager = AsyncRequestsManager(
                self.workers.remote_workers(),
                max_remote_requests_in_flight_per_worker=1,
                ray_wait_timeout_s=0.03,
            )
Exemple #3
0
 def test_test_async_requests_task_doesnt_buffering(self):
     """Tests that the async manager drops"""
     workers = [RemoteRLlibActor.remote(sleep_time=0.1) for _ in range(2)]
     manager = AsyncRequestsManager(
         workers, max_remote_requests_in_flight_per_worker=2
     )
     for i in range(8):
         scheduled = manager.call(lambda w: w.task())
         if i < 4:
             assert scheduled, "We should have scheduled the task"
         else:
             assert not scheduled, (
                 "We should not have scheduled the task because"
                 " all workers are busy."
             )
     assert len(manager._pending_remotes) == 4, "We should have 4 pending requests"
     time.sleep(3)
     ready_requests = manager.get_ready()
     for worker in workers:
         if not len(ready_requests[worker]) == 2:
             raise Exception(
                 "We should return the 2 ready requests in this case from each "
                 "actors."
             )
     for _ in range(4):
         manager.call(lambda w: w.task())
     # new tasks scheduled from the buffer
     time.sleep(3)
     ready_requests = manager.get_ready()
     for worker in workers:
         if not len(ready_requests[worker]) == 2:
             raise Exception(
                 "We should return the 2 ready requests in this case from each "
                 "actors"
             )
Exemple #4
0
    def test_add_remove_actors(self):
        """Tests that the async manager can properly add and remove actors"""

        workers = []
        manager = AsyncRequestsManager(
            workers, max_remote_requests_in_flight_per_worker=2
        )
        if not (
            (
                len(manager._all_workers)
                == len(manager._remote_requests_in_flight)
                == len(manager._pending_to_actor)
                == len(manager._pending_remotes)
                == 0
            )
        ):
            raise ValueError("We should have no workers in this case.")

        assert not manager.call(lambda w: w.task()), (
            "Task shouldn't have been "
            "launched since there are no "
            "workers in the manager."
        )
        worker = RemoteRLlibActor.remote(sleep_time=0.1)
        manager.add_workers(worker)
        manager.call(lambda w: w.task())
        if not (
            len(manager._remote_requests_in_flight[worker])
            == len(manager._pending_to_actor)
            == len(manager._all_workers)
            == len(manager._pending_remotes)
            == 1
        ):
            raise ValueError("We should have 1 worker and 1 pending request")
        time.sleep(3)
        manager.get_ready()
        # test worker removal
        for i in range(2):
            manager.call(lambda w: w.task())
            assert len(manager._pending_remotes) == i + 1
        manager.remove_workers(worker)
        if not ((len(manager._all_workers) == 0)):
            raise ValueError("We should have no workers that we can schedule tasks to")
        if not (
            (len(manager._pending_remotes) == 2 and len(manager._pending_to_actor) == 2)
        ):
            raise ValueError(
                "We should still have 2 pending requests in flight from the worker"
            )
        time.sleep(3)
        result = manager.get_ready()
        if not (
            len(result) == 1
            and len(result[worker]) == 2
            and len(manager._pending_remotes) == 0
            and len(manager._pending_to_actor) == 0
        ):
            raise ValueError(
                "We should have 2 ready results from the worker and no pending requests"
            )
 def test_call_to_actor(self):
     workers = [RemoteRLlibActor.remote(sleep_time=0.1) for _ in range(2)]
     worker_not_in_manager = RemoteRLlibActor.remote(sleep_time=0.1)
     manager = AsyncRequestsManager(
         workers, max_remote_requests_in_flight_per_worker=2)
     manager.call(lambda w: w.task(), actor=workers[0])
     time.sleep(3)
     results = manager.get_ready()
     if not len(results) == 1 and workers[0] not in results:
         raise Exception(
             "We should return the 1 ready requests in this case from the worker we "
             "called to")
     with pytest.raises(ValueError,
                        match=".*has not been added to the manager.*"):
         manager.call(lambda w: w.task(), actor=worker_not_in_manager)
Exemple #6
0
 def test_round_robin_scheduling(self):
     """Test that the async manager schedules actors in a round robin fashion"""
     workers = [RemoteRLlibActor.remote(sleep_time=0.1) for _ in range(2)]
     manager = AsyncRequestsManager(
         workers, max_remote_requests_in_flight_per_worker=2
     )
     for i in range(4):
         scheduled_actor = workers[i % len(workers)]
         manager.call(lambda w: w.task())
         if i < 2:
             assert len(manager._remote_requests_in_flight[scheduled_actor]) == 1, (
                 "We should have 1 request in flight for the actor that we just "
                 "scheduled on"
             )
         else:
             assert len(manager._remote_requests_in_flight[scheduled_actor]) == 2, (
                 "We should have 2 request in flight for the actor that we just "
                 "scheduled on"
             )
 def test_async_requests_manager_num_returns(self):
     """Tests that an async manager can properly handle actors with tasks that
     vary in the amount of time that they take to run"""
     workers = [RemoteRLlibActor.remote(sleep_time=0.1) for _ in range(2)]
     workers += [RemoteRLlibActor.remote(sleep_time=5) for _ in range(2)]
     manager = AsyncRequestsManager(
         workers, max_remote_requests_in_flight_per_worker=1)
     for _ in range(4):
         manager.call(lambda w: w.task())
     time.sleep(3)
     if not len(manager.get_ready()) == 2:
         raise Exception(
             "We should return the 2 ready requests in this case from the actors"
             " that have shorter tasks")
     time.sleep(7)
     if not len(manager.get_ready()) == 2:
         raise Exception(
             "We should return the 2 ready requests in this case from the actors"
             " that have longer tasks")
 def test_args_kwargs(self):
     """Tests that the async manager can properly handle actors with tasks that
     vary in the amount of time that they take to run"""
     workers = [RemoteRLlibActor.remote(sleep_time=0.1)]
     manager = AsyncRequestsManager(
         workers, max_remote_requests_in_flight_per_worker=2)
     for _ in range(2):
         manager.call(lambda w, a, b: w.task2(a, b), fn_args=[1, 2])
     time.sleep(3)
     if not len(manager.get_ready()[workers[0]]) == 2:
         raise Exception(
             "We should return the 2 ready requests in this case from the actors"
             " that have shorter tasks")
     for _ in range(2):
         manager.call(lambda w, a, b: w.task2(a, b),
                      fn_kwargs=dict(a=1, b=2))
     time.sleep(3)
     if not len(manager.get_ready()[workers[0]]) == 2:
         raise Exception(
             "We should return the 2 ready requests in this case from the actors"
             " that have longer tasks")
Exemple #9
0
    def setup(self, config: PartialAlgorithmConfigDict):
        super().setup(config)

        if self.config["_disable_execution_plan_api"]:
            # Create extra aggregation workers and assign each rollout worker to
            # one of them.
            self.batches_to_place_on_learner = []
            self.batch_being_built = []
            if self.config["num_aggregation_workers"] > 0:
                # This spawns `num_aggregation_workers` actors that aggregate
                # experiences coming from RolloutWorkers in parallel. We force
                # colocation on the same node (localhost) to maximize data bandwidth
                # between them and the learner.
                localhost = platform.node()
                assert localhost != "", (
                    "ERROR: Cannot determine local node name! "
                    "`platform.node()` returned empty string.")
                all_co_located = create_colocated_actors(
                    actor_specs=[
                        # (class, args, kwargs={}, count=1)
                        (
                            AggregatorWorker,
                            [
                                self.config,
                            ],
                            {},
                            self.config["num_aggregation_workers"],
                        )
                    ],
                    node=localhost,
                )
                self._aggregator_workers = [
                    actor for actor_groups in all_co_located
                    for actor in actor_groups
                ]
                self._aggregator_actor_manager = AsyncRequestsManager(
                    self._aggregator_workers,
                    max_remote_requests_in_flight_per_worker=self.
                    config["max_requests_in_flight_per_aggregator_worker"],
                    ray_wait_timeout_s=self.
                    config["timeout_s_aggregator_manager"],
                )

            else:
                # Create our local mixin buffer if the num of aggregation workers is 0.
                self.local_mixin_buffer = MixInMultiAgentReplayBuffer(
                    capacity=(self.config["replay_buffer_num_slots"]
                              if self.config["replay_buffer_num_slots"] > 0
                              else 1),
                    replay_ratio=self.config["replay_ratio"],
                    replay_mode=ReplayMode.LOCKSTEP,
                )

            self._sampling_actor_manager = AsyncRequestsManager(
                self.workers.remote_workers(),
                max_remote_requests_in_flight_per_worker=self.
                config["max_requests_in_flight_per_sampler_worker"],
                return_object_refs=True,
                ray_wait_timeout_s=self.config["timeout_s_sampler_manager"],
            )

            # Create and start the learner thread.
            self._learner_thread = make_learner_thread(
                self.workers.local_worker(), self.config)
            self._learner_thread.start()
            self.workers_that_need_updates = set()
Exemple #10
0
    def setup(self, config: PartialAlgorithmConfigDict):
        super().setup(config)

        # Shortcut: If execution_plan, thread and buffer will be created in there.
        if self.config["_disable_execution_plan_api"] is False:
            return

        # Tag those workers (top 1/3rd indices) that we should collect episodes from
        # for metrics due to `PerWorkerEpsilonGreedy` exploration strategy.
        if self.workers.remote_workers():
            self._remote_workers_for_metrics = self.workers.remote_workers(
            )[-len(self.workers.remote_workers()) // 3:]

        num_replay_buffer_shards = self.config["optimizer"][
            "num_replay_buffer_shards"]

        # Create copy here so that we can modify without breaking other logic
        replay_actor_config = copy.deepcopy(
            self.config["replay_buffer_config"])

        replay_actor_config["capacity"] = (
            self.config["replay_buffer_config"]["capacity"] //
            num_replay_buffer_shards)

        ReplayActor = ray.remote(num_cpus=0)(replay_actor_config["type"])

        # Place all replay buffer shards on the same node as the learner
        # (driver process that runs this execution plan).
        if replay_actor_config["replay_buffer_shards_colocated_with_driver"]:
            self._replay_actors = create_colocated_actors(
                actor_specs=[  # (class, args, kwargs={}, count)
                    (
                        ReplayActor,
                        None,
                        replay_actor_config,
                        num_replay_buffer_shards,
                    )
                ],
                node=platform.node(),  # localhost
            )[0]  # [0]=only one item in `actor_specs`.
        # Place replay buffer shards on any node(s).
        else:
            self._replay_actors = [
                ReplayActor.remote(*replay_actor_config)
                for _ in range(num_replay_buffer_shards)
            ]
        self._replay_actor_manager = AsyncRequestsManager(
            self._replay_actors,
            max_remote_requests_in_flight_per_worker=self.
            config["max_requests_in_flight_per_replay_worker"],
            ray_wait_timeout_s=self.config["timeout_s_replay_manager"],
        )
        self._sampling_actor_manager = AsyncRequestsManager(
            self.workers.remote_workers(),
            max_remote_requests_in_flight_per_worker=self.
            config["max_requests_in_flight_per_sampler_worker"],
            ray_wait_timeout_s=self.config["timeout_s_sampler_manager"],
        )
        self.learner_thread = LearnerThread(self.workers.local_worker())
        self.learner_thread.start()
        self.steps_since_update = defaultdict(int)
        weights = self.workers.local_worker().get_weights()
        self.curr_learner_weights = ray.put(weights)
        self.curr_num_samples_collected = 0
        self.replay_sample_batches = []
        self._num_ts_trained_since_last_target_update = 0
Exemple #11
0
 def setup(self, config: PartialTrainerConfigDict):
     super().setup(config)
     self._worker_manager = AsyncRequestsManager(
         self.workers.remote_workers(),
         max_remote_requests_in_flight_per_worker=1)
Exemple #12
0
    def setup(self, config: PartialAlgorithmConfigDict):
        # Call super's setup to validate config, create RolloutWorkers
        # (train and eval), etc..
        num_gpus_saved = config["num_gpus"]
        config["num_gpus"] = min(config["num_gpus"], 1)
        super().setup(config)
        self.config["num_gpus"] = num_gpus_saved

        # - Create n policy learner actors (@ray.remote-converted Policies) on
        #   one or more GPU nodes.
        # - On each such node, also locate one replay buffer shard.

        ma_cfg = self.config["multiagent"]
        # By default, set max_num_policies_to_train to the number of policy IDs
        # provided in the multiagent config.
        if self.config["max_num_policies_to_train"] is None:
            self.config["max_num_policies_to_train"] = len(
                self.workers.local_worker().get_policies_to_train())

        # Single CPU replay shard (co-located with GPUs so we can place the
        # policies on the same machine(s)).
        num_gpus = (0.01 if (self.config["num_gpus"]
                             and not self.config["_fake_gpus"]) else 0)
        ReplayActor = ray.remote(
            num_cpus=1,
            num_gpus=num_gpus,
        )(MixInMultiAgentReplayBuffer)

        # Setup remote replay buffer shards and policy learner actors
        # (located on any GPU machine in the cluster):
        replay_actor_args = [
            self.config["replay_buffer_capacity"],
            self.config["replay_buffer_replay_ratio"],
        ]

        # Create a DistributedLearners utility object and set it up with
        # the initial first n learnable policies (found in the config).
        distributed_learners = DistributedLearners(
            config=self.config,
            max_num_policies_to_train=self.config["max_num_policies_to_train"],
            replay_actor_class=ReplayActor,
            replay_actor_args=replay_actor_args,
        )
        for pid, policy_spec in ma_cfg["policies"].items():
            if pid in self.workers.local_worker().get_policies_to_train():
                distributed_learners.add_policy(pid, policy_spec)

        # Store distributed_learners on all RolloutWorkers
        # so they know, to which replay shard to send samples to.

        def _set_policy_learners(worker):
            worker._distributed_learners = distributed_learners

        ray.get([
            w.apply.remote(_set_policy_learners)
            for w in self.workers.remote_workers()
        ])

        self.distributed_learners = distributed_learners
        self._sampling_actor_manager = AsyncRequestsManager(
            self.workers.remote_workers(),
            max_remote_requests_in_flight_per_worker=self.
            config["max_requests_in_flight_per_sampler_worker"],
            ray_wait_timeout_s=self.config["timeout_s_sampler_manager"],
        )
        policy_actors = [
            policy_actor for _, policy_actor, _ in distributed_learners
        ]
        self._learner_worker_manager = AsyncRequestsManager(
            workers=policy_actors,
            max_remote_requests_in_flight_per_worker=self.
            config["max_requests_in_flight_per_learner_worker"],
            ray_wait_timeout_s=self.config["timeout_s_learner_manager"],
        )