def test_job_level_gc(start_cluster, field, spec_format, tmp_path):
    """Tests that job-level conda env is GC'd when the job exits."""
    # We must use a single-node cluster.  If we simulate a multi-node cluster
    # then the conda installs will proceed simultaneously, one on each node,
    # but since they're actually running on the same machine we get errors.
    cluster, address = start_cluster

    ray.init(
        address, runtime_env=generate_runtime_env_dict(field, spec_format, tmp_path)
    )

    @ray.remote
    def f():
        import pip_install_test  # noqa: F401

        return True

    # Ensure that the runtime env has been installed.
    assert ray.get(f.remote())

    assert not check_local_files_gced(cluster)

    ray.shutdown()

    wait_for_condition(lambda: check_local_files_gced(cluster), timeout=30)

    # Check that we can reconnect with the same env.  (In other words, ensure
    # the conda env was fully deleted and not left in some kind of corrupted
    # state that prevents reinstalling the same conda env.)

    ray.init(
        address, runtime_env=generate_runtime_env_dict(field, spec_format, tmp_path)
    )

    assert ray.get(f.remote())
    def test_detached_actor_gc(
        self, runtime_env_disable_URI_cache, start_cluster, field, spec_format, tmp_path
    ):
        """Tests that detached actor's conda env is GC'd only when it exits."""
        cluster, address = start_cluster

        ray.init(
            address,
            namespace="test",
            runtime_env=generate_runtime_env_dict(field, spec_format, tmp_path),
        )

        @ray.remote
        class A:
            def test_import(self):
                import pip_install_test  # noqa: F401

                return True

        a = A.options(name="test", lifetime="detached").remote()
        ray.get(a.test_import.remote())

        assert not check_local_files_gced(cluster)

        ray.shutdown()
        ray.init(address, namespace="test")

        assert not check_local_files_gced(cluster)

        a = ray.get_actor("test")
        assert ray.get(a.test_import.remote())

        ray.kill(a)

        wait_for_condition(lambda: check_local_files_gced(cluster), timeout=30)
    def test_actor_level_gc(self, runtime_env_disable_URI_cache, start_cluster,
                            field, spec_format, tmp_path):
        """Tests that actor-level working_dir is GC'd when the actor exits."""
        cluster, address = start_cluster

        ray.init(address)

        runtime_env = generate_runtime_env_dict(field, spec_format, tmp_path)

        @ray.remote
        class A:
            def test_import(self):
                import pip_install_test  # noqa: F401

                return True

        NUM_ACTORS = 5
        actors = [
            A.options(runtime_env=runtime_env).remote()
            for _ in range(NUM_ACTORS)
        ]
        ray.get([a.test_import.remote() for a in actors])
        for i in range(5):
            assert not check_local_files_gced(cluster)
            ray.kill(actors[i])
        wait_for_condition(lambda: check_local_files_gced(cluster))
def test_task_level_gc(ray_start_cluster, field, spec_format, tmp_path):
    """Tests that task-level working_dir is GC'd when the task exits."""

    cluster = ray_start_cluster

    soft_limit_zero = False
    system_config = cluster.list_all_nodes()[0]._ray_params._system_config
    if ("num_workers_soft_limit" in system_config
            and system_config["num_workers_soft_limit"] == 0):
        soft_limit_zero = True

    runtime_env = generate_runtime_env_dict(field, spec_format, tmp_path)

    @ray.remote
    def f():
        import pip_install_test  # noqa: F401

        return True

    @ray.remote
    class A:
        def test_import(self):
            import pip_install_test  # noqa: F401

            return True

    # Start a task with runtime env
    ray.get(f.options(runtime_env=runtime_env).remote())
    if soft_limit_zero:
        # Wait for worker exited and local files gced
        wait_for_condition(lambda: check_local_files_gced(cluster))
    else:
        # Local files should not be gced because of an enough soft limit.
        assert not check_local_files_gced(cluster)

    # Start a actor with runtime env
    actor = A.options(runtime_env=runtime_env).remote()
    ray.get(actor.test_import.remote())
    # Local files should not be gced
    assert not check_local_files_gced(cluster)

    # Kill actor
    ray.kill(actor)
    if soft_limit_zero:
        # Wait for worker exited and local files gced
        wait_for_condition(lambda: check_local_files_gced(cluster))
    else:
        # Local files should not be gced because of an enough soft limit.
        assert not check_local_files_gced(cluster)

    # Start a task with runtime env
    ray.get(f.options(runtime_env=runtime_env).remote())
    if soft_limit_zero:
        # Wait for worker exited and local files gced
        wait_for_condition(lambda: check_local_files_gced(cluster))
    else:
        # Local files should not be gced because of an enough soft limit.
        assert not check_local_files_gced(cluster)
def test_install_failure_logging(
    start_cluster,
    specify_env_in_init,
    field,
    spec_format,
    tmp_path,
):
    cluster, address = start_cluster
    using_ray_client = address.startswith("ray://")

    bad_envs: Dict[str, Dict] = {}
    bad_packages: Dict[str, str] = {}
    for scope in "init", "actor", "task":
        bad_packages[scope] = "doesnotexist" + scope
        bad_envs[scope] = generate_runtime_env_dict(
            field, spec_format, tmp_path, pip_list=[bad_packages[scope]])

    if specify_env_in_init:
        if using_ray_client:
            with pytest.raises(ConnectionAbortedError) as excinfo:
                ray.init(address, runtime_env=bad_envs["init"])
                assert bad_packages["init"] in str(excinfo.value)
        else:
            ray.init(address, runtime_env=bad_envs["init"])

            @ray.remote
            def g():
                pass

            with pytest.raises(RuntimeEnvSetupError,
                               match=bad_packages["init"]):
                ray.get(g.remote())
        return

    ray.init(address)

    @ray.remote(runtime_env=bad_envs["actor"])
    class A:
        def f(self):
            pass

    a = A.remote()  # noqa
    with pytest.raises(RuntimeEnvSetupError, match=bad_packages["actor"]):
        ray.get(a.f.remote())

    @ray.remote(runtime_env=bad_envs["task"])
    def f():
        pass

    with pytest.raises(RuntimeEnvSetupError, match=bad_packages["task"]):
        ray.get(f.remote())
Beispiel #6
0
    def test_skip_local_gc_env_var(self, skip_local_gc, start_cluster, field,
                                   tmp_path):
        cluster, address = start_cluster
        runtime_env = generate_runtime_env_dict(field, "python_object",
                                                tmp_path)
        ray.init(address, namespace="test", runtime_env=runtime_env)

        @ray.remote
        def f():
            import pip_install_test  # noqa: F401
            return True

        assert ray.get(f.remote())

        ray.shutdown()

        # Give enough time for potentially uninstalling a conda env
        time.sleep(10)

        # Check nothing was GC'ed
        assert not check_local_files_gced(cluster)