def test_simple_adder(serve_instance): ModelWrapperDeployment.options(name="Adder").deploy( predictor_cls=AdderPredictor, checkpoint=Checkpoint.from_dict({"increment": 2}), ) resp = ray.get(send_request.remote(json={"array": [40]})) assert resp == {"value": [42], "batch_size": 1}
def test_mixed_input_output_type_with_batching(serve_instance): ModelWrapperDeployment.options(name="Adder").deploy( predictor_cls=TakeArrayReturnDataFramePredictor, checkpoint=Checkpoint.from_dict({"increment": 2}), batching_params=dict(max_batch_size=2, batch_wait_timeout_s=1000), ) refs = [send_request.remote(json={"array": [40, 45]}) for _ in range(2)] for resp in ray.get(refs): assert resp == [{"col_a": 42.0, "col_b": 47.0}]
def test_batching(serve_instance): ModelWrapperDeployment.options(name="Adder").deploy( predictor_cls=AdderPredictor, checkpoint=Checkpoint.from_dict({"increment": 2}), batching_params=dict(max_batch_size=2, batch_wait_timeout_s=1000), ) refs = [send_request.remote(json={"array": [40]}) for _ in range(2)] for resp in ray.get(refs): assert resp == {"value": [42], "batch_size": 2}
def test_model_wrappers_in_pipeline(serve_instance): _, path = tempfile.mkstemp() with open(path, "w") as f: json.dump(2, f) predictor_cls = "ray.serve.tests.test_model_wrappers.AdderPredictor" checkpoint_cls = "ray.serve.tests.test_model_wrappers.AdderCheckpoint" with InputNode() as dag_input: m1 = ModelWrapperDeployment.bind( predictor_cls=predictor_cls, # TODO: can't be the raw class right now? checkpoint={ # TODO: can't be the raw object right now? "checkpoint_cls": checkpoint_cls, "uri": path, }, ) dag = m1.predict.bind(dag_input) deployments = build(Ingress.bind(dag)) for d in deployments: d.deploy() resp = requests.post("http://127.0.0.1:8000/ingress", json={"array": [40]}) print(resp.text) resp.raise_for_status() return resp.json() == {"value": [42], "batch_size": 1}
def serve_rl_model(checkpoint: Checkpoint, name="RLModel") -> str: """Serve a RL model and return deployment URI. This function will start Ray Serve and deploy a model wrapper that loads the RL checkpoint into a RLPredictor. """ serve.start(detached=True) deployment = ModelWrapperDeployment.options(name=name) deployment.deploy(RLPredictor, checkpoint) return deployment.url
def test_model_wrappers_in_pipeline(serve_instance): path = tempfile.mkdtemp() uri = f"file://{path}/test_uri" Checkpoint.from_dict({"increment": 2}).to_uri(uri) predictor_cls = "ray.serve.tests.test_model_wrappers.AdderPredictor" with InputNode() as dag_input: m1 = ModelWrapperDeployment.bind( predictor_cls=predictor_cls, checkpoint=uri, ) dag = m1.predict.bind(dag_input) deployments = build(Ingress.bind(dag)) for d in deployments: d.deploy() resp = requests.post("http://127.0.0.1:8000/ingress", json={"array": [40]}) print(resp.text) resp.raise_for_status() return resp.json() == {"value": [42], "batch_size": 1}
# __air_deploy_start__ from ray import serve from fastapi import Request from ray.serve.model_wrappers import ModelWrapperDeployment from ray.serve.http_adapters import json_request async def adapter(request: Request): content = await request.json() print(content) return pd.DataFrame.from_dict(content) serve.start(detached=True) deployment = ModelWrapperDeployment.options(name="XGBoostService") deployment.deploy( XGBoostPredictor, result.checkpoint, batching_params=False, http_adapter=adapter ) print(deployment.url) # __air_deploy_end__ # __air_inference_start__ import requests sample_input = test_dataset.take(1) sample_input = dict(sample_input[0]) output = requests.post(deployment.url, json=[sample_input]).json()