Esempio n. 1
0
def test_embedded_objectrefs(workflow_start_regular):
    workflow_id = test_workflow_storage.__name__

    class ObjectRefsWrapper:
        def __init__(self, refs):
            self.refs = refs

    wf_storage = workflow_storage.WorkflowStorage(workflow_id,
                                                  storage.get_global_storage())
    url = storage.get_global_storage().storage_url

    wrapped = ObjectRefsWrapper([ray.put(1), ray.put(2)])

    asyncio_run(wf_storage._put(["key"], wrapped))

    # Be extremely explicit about shutting down. We want to make sure the
    # `_get` call deserializes the full object and puts it in the object store.
    # Shutting down the cluster should guarantee we don't accidently get the
    # old object and pass the test.
    ray.shutdown()
    subprocess.check_output("ray stop --force", shell=True)

    workflow.init(url)
    storage2 = get_workflow_storage(workflow_id)

    result = asyncio_run(storage2._get(["key"]))
    assert ray.get(result.refs) == [1, 2]
Esempio n. 2
0
def test_workflow_lifetime_2(call_ray_start):
    # Case 2: driver terminated
    proc = run_string_as_driver_nonblocking(driver_script.format(100))
    time.sleep(10)
    proc.kill()
    time.sleep(1)
    workflow.init()
    output = workflow.get_output("driver_terminated")
    assert ray.get(output) == 20
Esempio n. 3
0
def test_recovery_cluster_failure():
    subprocess.check_call(["ray", "start", "--head"])
    time.sleep(1)
    proc = run_string_as_driver_nonblocking(driver_script)
    time.sleep(10)
    subprocess.check_call(["ray", "stop"])
    proc.kill()
    time.sleep(1)
    workflow.init()
    assert ray.get(workflow.resume("cluster_failure")) == 20
    ray.shutdown()
Esempio n. 4
0
def test_recovery_cluster_failure(reset_workflow, tmp_path):
    subprocess.check_call(["ray", "start", "--head"])
    time.sleep(1)
    proc = run_string_as_driver_nonblocking(
        driver_script.format(tmp_path=str(tmp_path)))
    time.sleep(10)
    subprocess.check_call(["ray", "stop"])
    proc.kill()
    time.sleep(1)
    workflow.init(str(tmp_path))
    assert ray.get(workflow.resume("cluster_failure")) == 20
    workflow.storage.set_global_storage(None)
    ray.shutdown()
Esempio n. 5
0
def _workflow_start(storage_url, shared, **kwargs):
    init_kwargs = get_default_fixture_ray_kwargs()
    init_kwargs.update(kwargs)
    if ray.is_initialized():
        ray.shutdown()
        storage.set_global_storage(None)
    # Sometimes pytest does not cleanup all global variables.
    # we have to manually reset the workflow storage. This
    # should not be an issue for normal use cases, because global variables
    # are freed after the driver exits.
    address_info = ray.init(**init_kwargs)
    workflow.init(storage_url)
    yield address_info
    # The code after the yield will run as teardown code.
    ray.shutdown()
    storage.set_global_storage(None)
Esempio n. 6
0
def test_recovery_cluster_failure_resume_all(reset_workflow, tmp_path):
    tmp_path = tmp_path
    subprocess.check_call(["ray", "start", "--head"])
    time.sleep(1)
    workflow_dir = tmp_path / "workflow"
    lock_file = tmp_path / "lock_file"
    driver_script = f"""
import time
from ray.experimental import workflow
from filelock import FileLock
@workflow.step
def foo(x):
    with FileLock("{str(lock_file)}"):
        return 20

if __name__ == "__main__":
    workflow.init("{str(workflow_dir)}")
    assert foo.step(0).run(workflow_id="cluster_failure") == 20
"""
    lock = FileLock(lock_file)
    lock.acquire()

    proc = run_string_as_driver_nonblocking(driver_script)
    time.sleep(10)
    subprocess.check_call(["ray", "stop"])
    proc.kill()
    time.sleep(1)
    lock.release()
    workflow.init(str(workflow_dir))
    resumed = workflow.resume_all()
    assert len(resumed) == 1
    (wid, obj_ref) = resumed[0]
    assert wid == "cluster_failure"
    assert ray.get(obj_ref) == 20
    workflow.storage.set_global_storage(None)
    ray.shutdown()
Esempio n. 7
0
def test_resume_different_storage(ray_start_regular, tmp_path, reset_workflow):
    workflow.init(storage=str(tmp_path))
    constant.step().run(workflow_id="const")
    assert ray.get(workflow.resume(workflow_id="const")) == 31416
    workflow.storage.set_global_storage(None)
Esempio n. 8
0
def test_workflow_lifetime_1(call_ray_start):
    # Case 1: driver exits normally
    run_string_as_driver(driver_script.format(5))
    workflow.init()
    output = workflow.get_output("driver_terminated")
    assert ray.get(output) == 20
Esempio n. 9
0
def test_init_twice(call_ray_start, reset_workflow, tmp_path):
    workflow.init()
    with pytest.raises(RuntimeError):
        workflow.init(str(tmp_path))
Esempio n. 10
0
def test_init_twice_2(call_ray_start, reset_workflow, tmp_path):
    run_string_as_driver(driver_script)
    with pytest.raises(RuntimeError):
        workflow.init(str(tmp_path))
Esempio n. 11
0
def test_resume_different_storage(ray_start_regular):
    tmp_dir = tempfile.mkdtemp()
    workflow.init(storage=tmp_dir)
    constant.step().run(workflow_id="const")
    assert ray.get(workflow.resume(workflow_id="const")) == 31416
    shutil.rmtree(tmp_dir)
Esempio n. 12
0
def test_init_twice(tmp_path):
    workflow.init()
    with pytest.raises(RuntimeError):
        workflow.init(str(tmp_path))
Esempio n. 13
0
def _alter_storage(new_storage):
    set_global_storage(new_storage)
    # alter the storage
    ray.shutdown()
    os.system("ray stop --force")
    workflow.init(new_storage)