예제 #1
0
def test_actor_manager(workflow_start_regular, tmp_path):
    lock_file = tmp_path / "lock"

    @workflow.virtual_actor
    class LockCounter:
        def __init__(self, lck):
            self.counter = 0
            self.lck = lck

        @workflow.virtual_actor.readonly
        def val(self):
            with FileLock(self.lck):
                return self.counter

        def __getstate__(self):
            return (self.lck, self.counter)

        def __setstate__(self, state):
            self.lck, self.counter = state

    actor = LockCounter.get_or_create("counter", str(lock_file))
    ray.get(actor.ready())

    lock = FileLock(lock_file)
    lock.acquire()

    assert [("counter", workflow.SUCCESSFUL)] == workflow.list_all()

    actor.val.run_async()
    # Readonly function won't make the workflow running
    assert [("counter", workflow.SUCCESSFUL)] == workflow.list_all()
예제 #2
0
def test_no_init(shutdown_only):
    @workflow.step
    def f():
        pass

    fail_wf_init_error_msg = re.escape(
        "`workflow.init()` must be called prior to using "
        "the workflows API.")

    with pytest.raises(RuntimeError, match=fail_wf_init_error_msg):
        f.step().run()
    with pytest.raises(RuntimeError, match=fail_wf_init_error_msg):
        workflow.list_all()
    with pytest.raises(RuntimeError, match=fail_wf_init_error_msg):
        workflow.resume_all()
    with pytest.raises(RuntimeError, match=fail_wf_init_error_msg):
        workflow.cancel("wf")
    with pytest.raises(RuntimeError, match=fail_wf_init_error_msg):
        workflow.get_actor("wf")
예제 #3
0
def test_workflow_manager_simple(workflow_start_regular):
    assert [] == workflow.list_all()
    with pytest.raises(ValueError):
        workflow.get_status("X")
예제 #4
0
def test_workflow_manager(workflow_start_regular, tmp_path):
    # For sync between jobs
    tmp_file = str(tmp_path / "lock")
    lock = FileLock(tmp_file)
    lock.acquire()

    # For sync between jobs
    flag_file = tmp_path / "flag"
    flag_file.touch()

    @workflow.step
    def long_running(i):
        lock = FileLock(tmp_file)
        with lock.acquire():
            pass

        if i % 2 == 0:
            if flag_file.exists():
                raise ValueError()
        return 100

    outputs = [
        long_running.step(i).run_async(workflow_id=str(i)) for i in range(100)
    ]
    # Test list all, it should list all jobs running
    all_tasks = workflow.list_all()
    assert len(all_tasks) == 100
    all_tasks_running = workflow.list_all(workflow.RUNNING)
    assert dict(all_tasks) == dict(all_tasks_running)
    assert workflow.get_status("0") == "RUNNING"

    # Release lock and make sure all tasks finished
    lock.release()
    for o in outputs:
        try:
            r = ray.get(o)
        except Exception:
            continue
        assert 100 == r
    all_tasks_running = workflow.list_all(workflow.WorkflowStatus.RUNNING)
    assert len(all_tasks_running) == 0
    # Half of them failed and half succeed
    failed_jobs = workflow.list_all("FAILED")
    assert len(failed_jobs) == 50
    finished_jobs = workflow.list_all("SUCCESSFUL")
    assert len(finished_jobs) == 50

    all_tasks_status = workflow.list_all({
        workflow.WorkflowStatus.SUCCESSFUL, workflow.WorkflowStatus.FAILED,
        workflow.WorkflowStatus.RUNNING
    })
    assert len(all_tasks_status) == 100
    assert failed_jobs == [(k, v) for (k, v) in all_tasks_status
                           if v == workflow.WorkflowStatus.FAILED]
    assert finished_jobs == [(k, v) for (k, v) in all_tasks_status
                             if v == workflow.WorkflowStatus.SUCCESSFUL]

    # Test get_status
    assert workflow.get_status("0") == "FAILED"
    assert workflow.get_status("1") == "SUCCESSFUL"
    lock.acquire()
    r = workflow.resume("0")
    assert workflow.get_status("0") == workflow.RUNNING
    flag_file.unlink()
    lock.release()
    assert 100 == ray.get(r)
    assert workflow.get_status("0") == workflow.SUCCESSFUL

    # Test cancel
    lock.acquire()
    workflow.resume("2")
    assert workflow.get_status("2") == workflow.RUNNING
    workflow.cancel("2")
    assert workflow.get_status("2") == workflow.CANCELED

    # Now resume_all
    resumed = workflow.resume_all(include_failed=True)
    assert len(resumed) == 48
    lock.release()
    assert [ray.get(o) for (_, o) in resumed] == [100] * 48