예제 #1
0
def test_actor_manager(workflow_start_regular, tmp_path):
    lock_file = tmp_path / "lock"

    @workflow.virtual_actor
    class LockCounter:
        def __init__(self, lck):
            self.counter = 0
            self.lck = lck

        @workflow.virtual_actor.readonly
        def val(self):
            with FileLock(self.lck):
                return self.counter

        def incr(self):
            with FileLock(self.lck):
                self.counter += 1
                return self.counter

        def __getstate__(self):
            return (self.lck, self.counter)

        def __setstate__(self, state):
            self.lck, self.counter = state

    actor = LockCounter.get_or_create("counter", str(lock_file))
    ray.get(actor.ready())

    lock = FileLock(lock_file)
    lock.acquire()

    assert [("counter", workflow.SUCCESSFUL)] == workflow.list_all()

    v = actor.val.run_async()
    # Readonly function won't make the workflow running
    assert [("counter", workflow.SUCCESSFUL)] == workflow.list_all()
    lock.release()
    assert ray.get(v) == 0

    # Writer function would make the workflow running
    lock.acquire()
    v = actor.incr.run_async()
    time.sleep(2)
    assert [("counter", workflow.RUNNING)] == workflow.list_all()
    lock.release()
    assert ray.get(v) == 1
예제 #2
0
def test_workflow_queuing_1(shutdown_only, tmp_path):
    ray.init(storage=str(tmp_path))
    workflow.init(max_running_workflows=2, max_pending_workflows=2)

    import queue
    import filelock

    lock_path = str(tmp_path / ".lock")

    @ray.remote
    def long_running(x):
        with filelock.FileLock(lock_path):
            return x

    wfs = [long_running.bind(i) for i in range(5)]

    with filelock.FileLock(lock_path):
        refs = [
            workflow.run_async(wfs[i], workflow_id=f"workflow_{i}")
            for i in range(4)
        ]

        assert sorted(x[0] for x in workflow.list_all({workflow.RUNNING})) == [
            "workflow_0",
            "workflow_1",
        ]
        assert sorted(x[0] for x in workflow.list_all({workflow.PENDING})) == [
            "workflow_2",
            "workflow_3",
        ]

        with pytest.raises(queue.Full, match="Workflow queue has been full"):
            workflow.run(wfs[4], workflow_id="workflow_4")

    assert ray.get(refs) == [0, 1, 2, 3]
    assert workflow.run(wfs[4], workflow_id="workflow_4") == 4
    assert sorted(x[0] for x in workflow.list_all({workflow.SUCCESSFUL})) == [
        "workflow_0",
        "workflow_1",
        "workflow_2",
        "workflow_3",
        "workflow_4",
    ]
    for i in range(5):
        assert workflow.get_output(f"workflow_{i}") == i
예제 #3
0
def test_no_init(shutdown_only):
    @ray.remote
    def f():
        pass

    fail_wf_init_error_msg = re.escape(
        "`workflow.init()` must be called prior to using the workflows API.")

    with pytest.raises(RuntimeError, match=fail_wf_init_error_msg):
        workflow.create(f.bind()).run()
    with pytest.raises(RuntimeError, match=fail_wf_init_error_msg):
        workflow.list_all()
    with pytest.raises(RuntimeError, match=fail_wf_init_error_msg):
        workflow.resume_all()
    with pytest.raises(RuntimeError, match=fail_wf_init_error_msg):
        workflow.cancel("wf")
    with pytest.raises(RuntimeError, match=fail_wf_init_error_msg):
        workflow.get_actor("wf")
예제 #4
0
def test_workflow_queuing_2(shutdown_only, tmp_path):
    ray.init(storage=str(tmp_path))
    workflow.init(max_running_workflows=2, max_pending_workflows=2)

    @ray.remote
    def short_running(x):
        return x

    wfs = [short_running.bind(i) for i in range(5)]
    refs = [
        workflow.run_async(wfs[i], workflow_id=f"workflow_{i}")
        for i in range(4)
    ]
    for i in range(4):
        assert workflow.get_output(f"workflow_{i}") == i
    assert ray.get(refs) == [0, 1, 2, 3]
    assert workflow.run(wfs[4], workflow_id="workflow_4") == 4
    assert sorted(x[0] for x in workflow.list_all({workflow.SUCCESSFUL})) == [
        "workflow_0",
        "workflow_1",
        "workflow_2",
        "workflow_3",
        "workflow_4",
    ]
예제 #5
0
def test_workflow_manager_simple(workflow_start_regular):
    assert [] == workflow.list_all()
    with pytest.raises(workflow.common.WorkflowNotFoundError):
        workflow.get_status("X")
예제 #6
0
def test_workflow_manager(workflow_start_regular, tmp_path):
    # For sync between jobs
    tmp_file = str(tmp_path / "lock")
    lock = FileLock(tmp_file)
    lock.acquire()

    # For sync between jobs
    flag_file = tmp_path / "flag"
    flag_file.touch()

    @ray.remote
    def long_running(i):
        lock = FileLock(tmp_file)
        with lock.acquire():
            pass

        if i % 2 == 0:
            if flag_file.exists():
                raise ValueError()
        return 100

    outputs = [
        workflow.create(long_running.bind(i)).run_async(workflow_id=str(i))
        for i in range(100)
    ]
    # Test list all, it should list all jobs running
    all_tasks = workflow.list_all()
    assert len(all_tasks) == 100
    all_tasks_running = workflow.list_all(workflow.RUNNING)
    assert dict(all_tasks) == dict(all_tasks_running)
    assert workflow.get_status("0") == "RUNNING"

    # Release lock and make sure all tasks finished
    lock.release()
    for o in outputs:
        try:
            r = ray.get(o)
        except Exception:
            continue
        assert 100 == r
    all_tasks_running = workflow.list_all(workflow.WorkflowStatus.RUNNING)
    assert len(all_tasks_running) == 0
    # Half of them failed and half succeed
    failed_jobs = workflow.list_all("FAILED")
    assert len(failed_jobs) == 50
    finished_jobs = workflow.list_all("SUCCESSFUL")
    assert len(finished_jobs) == 50

    all_tasks_status = workflow.list_all(
        {
            workflow.WorkflowStatus.SUCCESSFUL,
            workflow.WorkflowStatus.FAILED,
            workflow.WorkflowStatus.RUNNING,
        }
    )
    assert len(all_tasks_status) == 100
    assert failed_jobs == [
        (k, v) for (k, v) in all_tasks_status if v == workflow.WorkflowStatus.FAILED
    ]
    assert finished_jobs == [
        (k, v) for (k, v) in all_tasks_status if v == workflow.WorkflowStatus.SUCCESSFUL
    ]

    # Test get_status
    assert workflow.get_status("0") == "FAILED"
    assert workflow.get_status("1") == "SUCCESSFUL"
    lock.acquire()
    r = workflow.resume("0")
    assert workflow.get_status("0") == workflow.RUNNING
    flag_file.unlink()
    lock.release()
    assert 100 == ray.get(r)
    assert workflow.get_status("0") == workflow.SUCCESSFUL

    # Test cancel
    lock.acquire()
    workflow.resume("2")
    assert workflow.get_status("2") == workflow.RUNNING
    workflow.cancel("2")
    assert workflow.get_status("2") == workflow.CANCELED

    # Now resume_all
    resumed = workflow.resume_all(include_failed=True)
    assert len(resumed) == 48
    lock.release()
    assert [ray.get(o) for (_, o) in resumed] == [100] * 48
예제 #7
0
def test_workflow_manager_simple(workflow_start_regular):
    assert [] == workflow.list_all()
    with pytest.raises(ValueError):
        workflow.get_status("X")
예제 #8
0
def test_delete(workflow_start_regular):
    from ray._private.storage import _storage_uri

    # Try deleting a random workflow that never existed.
    with pytest.raises(WorkflowNotFoundError):
        workflow.delete(workflow_id="never_existed")

    # Delete a workflow that has not finished and is not running.
    @ray.remote
    def never_ends(x):
        utils.set_global_mark()
        time.sleep(1000000)
        return x

    workflow.create(never_ends.bind("hello world")).run_async("never_finishes")

    # Make sure the step is actualy executing before killing the cluster
    while not utils.check_global_mark():
        time.sleep(0.1)

    # Restart
    ray.shutdown()
    subprocess.check_output("ray stop --force", shell=True)
    ray.init(storage=_storage_uri)
    workflow.init()

    with pytest.raises(ray.exceptions.RaySystemError):
        result = workflow.get_output("never_finishes")
        ray.get(result)

    workflow.delete("never_finishes")

    with pytest.raises(ValueError):
        ouput = workflow.get_output("never_finishes")

    # TODO(Alex): Uncomment after
    # https://github.com/ray-project/ray/issues/19481.
    # with pytest.raises(WorkflowNotFoundError):
    #     workflow.resume("never_finishes")

    with pytest.raises(WorkflowNotFoundError):
        workflow.delete(workflow_id="never_finishes")

    # Delete a workflow which has finished.
    @ray.remote
    def basic_step(arg):
        return arg

    result = workflow.create(
        basic_step.bind("hello world")).run(workflow_id="finishes")
    assert result == "hello world"
    ouput = workflow.get_output("finishes")
    assert ray.get(ouput) == "hello world"

    workflow.delete(workflow_id="finishes")

    with pytest.raises(ValueError):
        ouput = workflow.get_output("finishes")

    # TODO(Alex): Uncomment after
    # https://github.com/ray-project/ray/issues/19481.
    # with pytest.raises(ValueError):
    #     workflow.resume("finishes")

    with pytest.raises(WorkflowNotFoundError):
        workflow.delete(workflow_id="finishes")

    assert workflow.list_all() == []

    # The workflow can be re-run as if it was never run before.
    assert workflow.create(
        basic_step.bind("123")).run(workflow_id="finishes") == "123"
예제 #9
0
def test_no_init_api(shutdown_only):
    workflow.list_all()
예제 #10
0
def test_workflow_manager_simple(workflow_start_regular):
    from ray.workflow.exceptions import WorkflowNotFoundError

    assert [] == workflow.list_all()
    with pytest.raises(WorkflowNotFoundError):
        workflow.get_status("X")
예제 #11
0
def test_workflow_queuing_resume_all(shutdown_only, tmp_path):
    ray.init(storage=str(tmp_path))
    workflow.init(max_running_workflows=2, max_pending_workflows=2)

    import queue
    import filelock

    lock_path = str(tmp_path / ".lock")

    @ray.remote
    def long_running(x):
        with filelock.FileLock(lock_path):
            return x

    wfs = [long_running.bind(i) for i in range(5)]

    with filelock.FileLock(lock_path):
        _refs = [  # noqa: F841
            workflow.run_async(wfs[i], workflow_id=f"workflow_{i}")
            for i in range(4)
        ]

        assert sorted(x[0] for x in workflow.list_all({workflow.RUNNING})) == [
            "workflow_0",
            "workflow_1",
        ]
        assert sorted(x[0] for x in workflow.list_all({workflow.PENDING})) == [
            "workflow_2",
            "workflow_3",
        ]

        with pytest.raises(queue.Full, match="Workflow queue has been full"):
            workflow.run(wfs[4], workflow_id="workflow_4")

        # kill all workflows
        ray.shutdown()

    ray.init(storage=str(tmp_path))
    workflow.init(max_running_workflows=2, max_pending_workflows=2)

    with filelock.FileLock(lock_path):
        workflow_ids, outputs = zip(*sorted(workflow.resume_all()))
        # We should have the same running and pending workflows, because when
        # resume_all(), running workflows have higher priority.
        assert sorted(x[0] for x in workflow.list_all({workflow.RUNNING})) == [
            "workflow_0",
            "workflow_1",
        ]
        assert sorted(x[0] for x in workflow.list_all({workflow.PENDING})) == [
            "workflow_2",
            "workflow_3",
        ]

    assert workflow_ids == (
        "workflow_0",
        "workflow_1",
        "workflow_2",
        "workflow_3",
    )

    assert ray.get(list(outputs)) == [0, 1, 2, 3]
    assert workflow.run(wfs[4], workflow_id="workflow_4") == 4
    assert sorted(x[0] for x in workflow.list_all({workflow.SUCCESSFUL})) == [
        "workflow_0",
        "workflow_1",
        "workflow_2",
        "workflow_3",
        "workflow_4",
    ]