예제 #1
0
파일: execution.py 프로젝트: ckw017/ray
def run(entry_workflow: Workflow,
        storage: Optional[Union[str, Storage]] = None,
        workflow_id: Optional[str] = None) -> ray.ObjectRef:
    """Run a workflow asynchronously. See "api.run()" for details."""
    if workflow_id is None:
        # Workflow ID format: {Entry workflow UUID}.{Unix time to nanoseconds}
        workflow_id = f"{entry_workflow.id}.{time.time():.9f}"
    if isinstance(storage, str):
        set_global_storage(create_storage(storage))
    elif isinstance(storage, Storage):
        set_global_storage(storage)
    elif storage is not None:
        raise TypeError("'storage' should be None, str, or Storage type.")
    storage_url = get_global_storage().storage_url
    logger.info(f"Workflow job created. [id=\"{workflow_id}\", storage_url="
                f"\"{storage_url}\"].")
    try:
        workflow_context.init_workflow_step_context(workflow_id, storage_url)
        commit_step(entry_workflow)
        try:
            actor = ray.get_actor(MANAGEMENT_ACTOR_NAME)
        except ValueError:
            # the actor does not exist
            actor = WorkflowManagementActor.options(
                name=MANAGEMENT_ACTOR_NAME, lifetime="detached").remote()
        # NOTE: It is important to 'ray.get' the returned output. This
        # ensures caller of 'run()' holds the reference to the workflow
        # result. Otherwise if the actor removes the reference of the
        # workflow output, the caller may fail to resolve the result.
        output = ray.get(actor.run_or_resume.remote(workflow_id, storage_url))
        direct_output = flatten_workflow_output(workflow_id, output)
    finally:
        workflow_context.set_workflow_step_context(None)
    return direct_output
예제 #2
0
def run(entry_workflow: Workflow,
        storage: Optional[Union[str, Storage]] = None,
        workflow_id: Optional[str] = None) -> ray.ObjectRef:
    """Run a workflow asynchronously. See "api.run()" for details."""
    if workflow_id is None:
        # Workflow ID format: {Entry workflow UUID}.{Unix time to nanoseconds}
        workflow_id = f"{entry_workflow.id}.{time.time():.9f}"
    if isinstance(storage, str):
        set_global_storage(create_storage(storage))
    elif isinstance(storage, Storage):
        set_global_storage(storage)
    elif storage is not None:
        raise TypeError("'storage' should be None, str, or Storage type.")
    storage_url = get_global_storage().storage_url
    logger.info(f"Workflow job created. [id=\"{workflow_id}\", storage_url="
                f"\"{storage_url}\"].")
    try:
        workflow_context.init_workflow_step_context(workflow_id, storage_url)
        commit_step(entry_workflow)
        # TODO(suquark): Move this to a detached named actor,
        # so the workflow shares fate with the actor.
        # The current plan is resuming the workflow on the detached named
        # actor. This is extremely simple to implement, but I am not sure
        # of its performance.
        output = recovery.resume_workflow_job(workflow_id,
                                              get_global_storage())
        logger.info(f"Workflow job {workflow_id} started.")
    finally:
        workflow_context.set_workflow_step_context(None)
    return output
예제 #3
0
def update_workflow_step_context(context: Optional[WorkflowStepContext],
                                 step_id: str):
    global _context
    _context = context
    _context.workflow_scope.append(step_id)
    # avoid cyclic import
    from ray.experimental.workflow import storage
    # TODO(suquark): [optimization] if the original storage has the same URL,
    # skip creating the new one
    storage.set_global_storage(storage.create_storage(context.storage_url))
예제 #4
0
def s3_storage(aws_credentials, s3_server):
    with mock_s3():
        client = boto3.client("s3",
                              region_name="us-west-2",
                              endpoint_url=s3_server)
        client.create_bucket(Bucket="test_bucket")
        url = ("s3://test_bucket/workflow"
               f"?region_name=us-west-2&endpoint_url={s3_server}")
        storage.set_global_storage(storage.create_storage(url))
        yield storage.get_global_storage()
예제 #5
0
def _workflow_start(storage_url, shared, **kwargs):
    init_kwargs = get_default_fixture_ray_kwargs()
    init_kwargs.update(kwargs)
    if ray.is_initialized():
        ray.shutdown()
        storage.set_global_storage(None)
    # Sometimes pytest does not cleanup all global variables.
    # we have to manually reset the workflow storage. This
    # should not be an issue for normal use cases, because global variables
    # are freed after the driver exits.
    address_info = ray.init(**init_kwargs)
    workflow.init(storage_url)
    yield address_info
    # The code after the yield will run as teardown code.
    ray.shutdown()
    storage.set_global_storage(None)
예제 #6
0
def init(storage: "Optional[Union[str, Storage]]" = None) -> None:
    """Initialize workflow.

    Args:
        storage: The external storage URL or a custom storage class. If not
            specified, ``/tmp/ray/workflow_data`` will be used.
    """
    if storage is None:
        storage = os.environ.get("RAY_WORKFLOW_STORAGE")

    if storage is None:
        # We should use get_temp_dir_path, but for ray client, we don't
        # have this one. We need a flag to tell whether it's a client
        # or a driver to use the right dir.
        # For now, just use /tmp/ray/workflow_data
        logger.warning("Using default local dir: `/tmp/ray/workflow_data`. "
                       "This should only be used for testing purposes.")
        storage = "file:///tmp/ray/workflow_data"
    if isinstance(storage, str):
        storage = storage_base.create_storage(storage)
    elif not isinstance(storage, Storage):
        raise TypeError("'storage' should be None, str, or Storage type.")

    try:
        _storage = storage_base.get_global_storage()
    except RuntimeError:
        pass
    else:
        # we have to use the 'else' branch because we would raise a
        # runtime error, but we do not want to be captured by 'except'
        if _storage.storage_url == storage.storage_url:
            logger.warning("Calling 'workflow.init()' again with the same "
                           "storage.")
        else:
            raise RuntimeError("Calling 'workflow.init()' again with a "
                               "different storage")
    storage_base.set_global_storage(storage)
    workflow_access.init_management_actor()
예제 #7
0
def filesystem_storage(tmp_path):
    storage.set_global_storage(
        storage.create_storage(f"{str(tmp_path)}/workflow_data"))
    yield storage.get_global_storage()
예제 #8
0
def init_workflow_step_context(workflow_id, workflow_root_dir) -> None:
    global _context
    if workflow_root_dir is not None:
        storage.set_global_storage(workflow_root_dir)
    assert workflow_id is not None
    _context = WorkflowStepContext(workflow_id, workflow_root_dir)
예제 #9
0
def _alter_storage(new_storage):
    set_global_storage(new_storage)
    # alter the storage
    ray.shutdown()
    os.system("ray stop --force")
    workflow.init(new_storage)