def test_failed_job_status(ray_start_with_dashboard, disable_aiohttp_cache, enable_test_module): address = ray_start_with_dashboard["webui_url"] assert wait_until_server_available(address) address = format_web_url(address) entrypoint_cmd = ("python -c\"" "import ray;" "ray.init();" "import time;" "time.sleep(5);" "import sys;" "sys.exit(1);" "\"") client = JobSubmissionClient(address) job_id = client.submit_job(entrypoint=entrypoint_cmd) def wait_for_job_to_fail(): data = _get_snapshot(address) for job_entry in data["data"]["snapshot"]["jobs"].values(): if job_entry["status"] is not None: assert job_entry["config"]["metadata"][ "jobSubmissionId"] == job_id assert job_entry["status"] in {"PENDING", "RUNNING", "FAILED"} assert job_entry["statusMessage"] is not None return job_entry["status"] == "FAILED" return False wait_for_condition(wait_for_job_to_fail, timeout=30)
def test_temporary_uri_reference(monkeypatch, expiration_s): """Test that temporary GCS URI references are deleted after expiration_s.""" monkeypatch.setenv("RAY_RUNTIME_ENV_TEMPORARY_REFERENCE_EXPIRATION_S", str(expiration_s)) # We can't use a fixture with a shared Ray runtime because we need to set the # expiration_s env var before Ray starts. with _ray_start(include_dashboard=True, num_cpus=1) as ctx: headers = { "Connection": "keep-alive", "Authorization": "TOK:<MY_TOKEN>" } address = ctx.address_info["webui_url"] assert wait_until_server_available(address) client = JobSubmissionClient(format_web_url(address), headers=headers) with tempfile.TemporaryDirectory() as tmp_dir: path = Path(tmp_dir) hello_file = path / "hi.txt" with hello_file.open(mode="w") as f: f.write("hi\n") start = time.time() client.submit_job(entrypoint="echo hi", runtime_env={"working_dir": tmp_dir}) # Give time for deletion to occur if expiration_s is 0. time.sleep(2) # Need to connect to Ray to check internal_kv. # ray.init(address="auto") print("Starting Internal KV checks at time ", time.time() - start) if expiration_s > 0: assert not check_internal_kv_gced() wait_for_condition(check_internal_kv_gced, timeout=2 * expiration_s) assert expiration_s < time.time() - start < 2 * expiration_s print("Internal KV was GC'ed at time ", time.time() - start) else: wait_for_condition(check_internal_kv_gced) print("Internal KV was GC'ed at time ", time.time() - start)