def test_shared_deployment_handle(serve_instance): """ Test we can re-use the same deployment handle multiple times or in multiple places, without incorrectly parsing duplicated deployments. """ ray_dag, dag_input = get_shared_deployment_handle_dag() serve_root_dag = ray_dag.apply_recursive(transform_ray_dag_to_serve_dag) print(f"Serve DAG: \n{serve_root_dag}") deployments = extract_deployments_from_serve_dag(serve_root_dag) assert len(deployments) == 2 for deployment in deployments: deployment.deploy() ingress_deployment = get_ingress_deployment(serve_root_dag, dag_input) ingress_deployment.deploy() _validate_consistent_python_output(deployments[1], ray_dag, "Combine", input=1, output=4) for _ in range(5): resp = requests.get("http://127.0.0.1:8000/ingress", data="1") assert resp.text == "4"
def test_multi_instantiation_class_deployment_in_init_args(serve_instance): """ Test we can pass deployments as init_arg or init_kwarg, instantiated multiple times for the same class, and we can still correctly replace args with deployment handle and parse correct deployment instances. """ ray_dag, dag_input = get_multi_instantiation_class_deployment_in_init_args_dag( ) serve_root_dag = ray_dag.apply_recursive(transform_ray_dag_to_serve_dag) print(f"Serve DAG: \n{serve_root_dag}") deployments = extract_deployments_from_serve_dag(serve_root_dag) assert len(deployments) == 3 for deployment in deployments: deployment.deploy() ingress_deployment = get_ingress_deployment(serve_root_dag, dag_input) ingress_deployment.deploy() _validate_consistent_python_output(deployments[2], ray_dag, "Combine", input=1, output=5) for _ in range(5): resp = requests.get("http://127.0.0.1:8000/ingress", data="1") assert resp.text == "5"
def test_func_class_with_class_method_dag(serve_instance): ray_dag, dag_input = get_func_class_with_class_method_dag() serve_root_dag = ray_dag.apply_recursive(transform_ray_dag_to_serve_dag) deployments = extract_deployments_from_serve_dag(serve_root_dag) ingress_deployment = get_ingress_deployment(serve_root_dag, dag_input) assert len(deployments) == 2 for deployment in deployments: deployment.deploy() ingress_deployment.deploy() assert ray.get(ray_dag.execute(1, 2, 3)) == 8 assert ray.get(serve_root_dag.execute(1, 2, 3)) == 8 for _ in range(5): resp = requests.get("http://127.0.0.1:8000/ingress", data=json.dumps([1, 2, 3])) assert resp.text == "8"
def test_simple_single_class(serve_instance): ray_dag, dag_input = get_simple_class_with_class_method_dag() serve_root_dag = ray_dag.apply_recursive(transform_ray_dag_to_serve_dag) deployments = extract_deployments_from_serve_dag(serve_root_dag) ingress_deployment = get_ingress_deployment(serve_root_dag, dag_input) assert len(deployments) == 1 deployments[0].deploy() ingress_deployment.deploy() _validate_consistent_python_output(deployments[0], ray_dag, "Model", input=1, output=0.6) for _ in range(5): resp = requests.get("http://127.0.0.1:8000/ingress", data="1") assert resp.text == "0.6"
def test_multi_instantiation_class_nested_deployment_arg(serve_instance): """ Test we can pass deployments with **nested** init_arg or init_kwarg, instantiated multiple times for the same class, and we can still correctly replace args with deployment handle and parse correct deployment instances. """ ray_dag, dag_input = get_multi_instantiation_class_nested_deployment_arg_dag( ) serve_root_dag = ray_dag.apply_recursive(transform_ray_dag_to_serve_dag) print(f"Serve DAG: \n{serve_root_dag}") deployments = extract_deployments_from_serve_dag(serve_root_dag) assert len(deployments) == 3 # Ensure Deployments with other deployment nodes in init arg are replaced # with correct handle combine_deployment = deployments[2] init_arg_handle = combine_deployment.init_args[0] assert isinstance(init_arg_handle, RayServeSyncHandle) assert init_arg_handle.deployment_name == "Model" init_kwarg_handle = combine_deployment.init_kwargs["m2"][NESTED_HANDLE_KEY] assert isinstance(init_kwarg_handle, RayServeSyncHandle) assert init_kwarg_handle.deployment_name == "Model_1" for deployment in deployments: deployment.deploy() ingress_deployment = get_ingress_deployment(serve_root_dag, dag_input) ingress_deployment.deploy() _validate_consistent_python_output(deployments[2], ray_dag, "Combine", input=1, output=5) for _ in range(5): resp = requests.get("http://127.0.0.1:8000/ingress", data="1") assert resp.text == "5"
def build(ray_dag_root_node: DAGNode): """Do all the DAG transformation, extraction and generation needed to produce a runnable and deployable serve pipeline application from a valid DAG authored with Ray DAG API. This should be the only user facing API that user interacts with. Assumptions: Following enforcements are only applied at generating and applying pipeline artifact, but not blockers for local development and testing. - ALL args and kwargs used in DAG building should be JSON serializable. This means in order to ensure your pipeline application can run on a remote cluster potentially with different runtime environment, among all options listed: 1) binding in-memory objects 2) Rely on pickling 3) Enforce JSON serialibility on all args used We believe both 1) & 2) rely on unstable in-memory objects or cross version pickling / closure capture, where JSON serialization provides the right contract needed for proper deployment. - ALL classes and methods used should be visible on top of the file and importable via a fully qualified name. Thus no inline class or function definitions should be used. Args: ray_dag_root_node: DAGNode acting as root of a Ray authored DAG. It should be executable via `ray_dag_root_node.execute(user_input)` and should have `PipelineInputNode` in it. Returns: app: The Ray Serve application object that wraps all deployments needed along with ingress deployment for an e2e runnable serve pipeline, accessible via python .remote() call and HTTP. Examples: >>> with ServeInputNode(preprocessor=request_to_data_int) as dag_input: ... m1 = Model.bind(1) ... m2 = Model.bind(2) ... m1_output = m1.forward.bind(dag_input[0]) ... m2_output = m2.forward.bind(dag_input[1]) ... ray_dag = ensemble.bind(m1_output, m2_output) Assuming we have non-JSON serializable or inline defined class or function in local pipeline development. >>> app = serve.pipeline.build(ray_dag) # This works >>> handle = app.deploy() >>> # This also works, we're simply executing the transformed serve_dag. >>> ray.get(handle.remote(data) >>> # This will fail where enforcements are applied. >>> deployment_yaml = app.to_yaml() """ serve_root_dag = ray_dag_root_node.apply_recursive(transform_ray_dag_to_serve_dag) deployments = extract_deployments_from_serve_dag(serve_root_dag) pipeline_input_node = get_pipeline_input_node(serve_root_dag) ingress_deployment = get_ingress_deployment(serve_root_dag, pipeline_input_node) deployments.insert(0, ingress_deployment) # TODO (jiaodong): Call into Application once Shreyas' PR is merged # TODO (jiaodong): Apply enforcements at serve app to_yaml level return deployments