Example #1
0
def test_failed_job_status(ray_start_with_dashboard, disable_aiohttp_cache,
                           enable_test_module):
    address = ray_start_with_dashboard["webui_url"]
    assert wait_until_server_available(address)
    address = format_web_url(address)

    entrypoint_cmd = ("python -c\""
                      "import ray;"
                      "ray.init();"
                      "import time;"
                      "time.sleep(5);"
                      "import sys;"
                      "sys.exit(1);"
                      "\"")
    client = JobSubmissionClient(address)
    job_id = client.submit_job(entrypoint=entrypoint_cmd)

    def wait_for_job_to_fail():
        data = _get_snapshot(address)
        for job_entry in data["data"]["snapshot"]["jobs"].values():
            if job_entry["status"] is not None:
                assert job_entry["config"]["metadata"][
                    "jobSubmissionId"] == job_id
                assert job_entry["status"] in {"PENDING", "RUNNING", "FAILED"}
                assert job_entry["statusMessage"] is not None
                return job_entry["status"] == "FAILED"

        return False

    wait_for_condition(wait_for_job_to_fail, timeout=30)
Example #2
0
def test_temporary_uri_reference(monkeypatch, expiration_s):
    """Test that temporary GCS URI references are deleted after expiration_s."""
    monkeypatch.setenv("RAY_RUNTIME_ENV_TEMPORARY_REFERENCE_EXPIRATION_S",
                       str(expiration_s))
    # We can't use a fixture with a shared Ray runtime because we need to set the
    # expiration_s env var before Ray starts.
    with _ray_start(include_dashboard=True, num_cpus=1) as ctx:
        headers = {
            "Connection": "keep-alive",
            "Authorization": "TOK:<MY_TOKEN>"
        }
        address = ctx.address_info["webui_url"]
        assert wait_until_server_available(address)
        client = JobSubmissionClient(format_web_url(address), headers=headers)
        with tempfile.TemporaryDirectory() as tmp_dir:
            path = Path(tmp_dir)

            hello_file = path / "hi.txt"
            with hello_file.open(mode="w") as f:
                f.write("hi\n")

            start = time.time()

            client.submit_job(entrypoint="echo hi",
                              runtime_env={"working_dir": tmp_dir})

            # Give time for deletion to occur if expiration_s is 0.
            time.sleep(2)
            # Need to connect to Ray to check internal_kv.
            # ray.init(address="auto")

            print("Starting Internal KV checks at time ", time.time() - start)
            if expiration_s > 0:
                assert not check_internal_kv_gced()
                wait_for_condition(check_internal_kv_gced,
                                   timeout=2 * expiration_s)
                assert expiration_s < time.time() - start < 2 * expiration_s
                print("Internal KV was GC'ed at time ", time.time() - start)
            else:
                wait_for_condition(check_internal_kv_gced)
                print("Internal KV was GC'ed at time ", time.time() - start)