Beispiel #1
0
def test_bad_resources(ray_start_2_cpus):
    with pytest.raises(ValueError):
        WorkerGroup(num_workers=-1)

    with pytest.raises(ValueError):
        WorkerGroup(num_cpus_per_worker=-1)

    with pytest.raises(ValueError):
        WorkerGroup(num_gpus_per_worker=-1)
Beispiel #2
0
    def start(self,
              initialization_hook: Optional[Callable[[], None]] = None,
              train_cls: Optional[Type] = None,
              train_cls_args: Optional[Tuple] = None,
              train_cls_kwargs: Optional[Dict] = None):
        """Starts the worker group."""
        self._create_placement_group()
        placement_group = self._placement_group or "default"
        self.worker_group = WorkerGroup(
            num_workers=self._num_workers,
            num_cpus_per_worker=self._num_cpus_per_worker,
            num_gpus_per_worker=self._num_gpus_per_worker,
            additional_resources_per_worker=self.
            _additional_resources_per_worker,
            actor_cls=train_cls,
            actor_cls_args=train_cls_args,
            actor_cls_kwargs=train_cls_kwargs,
            placement_group=placement_group)
        try:
            if initialization_hook:
                self._initialization_hook = initialization_hook
                self.worker_group.execute(initialization_hook)

            share_cuda_visible_devices_enabled = bool(
                env_integer(ENABLE_SHARE_CUDA_VISIBLE_DEVICES_ENV,
                            self._backend.share_cuda_visible_devices))

            if (self._num_gpus_per_worker > 0
                    and share_cuda_visible_devices_enabled):
                self._share_cuda_visible_devices()
            self._backend.on_start(self.worker_group, self._backend_config)
        except RayActorError as exc:
            logger.exception(str(exc))
            self._increment_failures()
            self._restart()
Beispiel #3
0
def test_worker_creation_num_cpus(ray_start_2_cpus):
    assert ray.available_resources()["CPU"] == 2
    wg = WorkerGroup(num_cpus_per_worker=2)
    time.sleep(1)
    assert len(wg.workers) == 1
    # Make sure both CPUs are being used by the actor.
    assert "CPU" not in ray.available_resources()
    wg.shutdown()
Beispiel #4
0
def test_worker_restart(ray_start_2_cpus):
    wg = WorkerGroup(num_workers=2)
    with pytest.raises(RuntimeError):
        wg.start()
    # Avoid race condition.
    time.sleep(1)
    wg.shutdown(0)
    wg.start()
    wg.execute(lambda: 1)
Beispiel #5
0
def test_placement_group(ray_start_2_cpus):
    """Tests that workers can be removed and added to a placement group."""
    num_workers = 2
    bundle = {"CPU": 1}
    bundles = [bundle.copy() for _ in range(num_workers)]
    placement_group = ray.util.placement_group(bundles)
    wg = WorkerGroup(num_workers=num_workers, placement_group=placement_group)
    wg.remove_workers([0])
    wg.add_workers(1)
Beispiel #6
0
def test_worker_shutdown(ray_start_2_cpus):
    assert ray.available_resources()["CPU"] == 2
    wg = WorkerGroup(num_workers=2)
    time.sleep(1)
    assert "CPU" not in ray.available_resources()
    assert len(ray.state.actors()) == 2
    wg.shutdown()
    time.sleep(1)
    assert ray.available_resources()["CPU"] == 2

    with pytest.raises(RuntimeError):
        wg.execute(lambda: 1)
Beispiel #7
0
def test_execute_single(ray_start_2_cpus):
    wg = WorkerGroup(num_workers=2)

    def f():
        import os
        os.environ["TEST"] = "1"

    wg.execute_single(1, f)

    def check():
        import os
        return os.environ.get("TEST", "0")

    assert wg.execute(check) == ["0", "1"]
Beispiel #8
0
def test_execute_args(ray_start_2_cpus):
    wg = WorkerGroup(num_workers=2)
    outputs = wg.execute(lambda x: x, 1)
    assert len(outputs) == 2
    assert all(o == 1 for o in outputs)
Beispiel #9
0
def test_execute_async(ray_start_2_cpus):
    wg = WorkerGroup(num_workers=2)
    futures = wg.execute_async(lambda: 1)
    assert len(futures) == 2
    outputs = ray.get(futures)
    assert all(o == 1 for o in outputs)