예제 #1
0
def test_starlette_response(serve_instance):
    def basic_response(_):
        return starlette.responses.Response("Hello, world!",
                                            media_type="text/plain")

    serve.create_backend("basic_response", basic_response)
    serve.create_endpoint("basic_response",
                          backend="basic_response",
                          route="/basic_response")
    assert requests.get(
        "http://127.0.0.1:8000/basic_response").text == "Hello, world!"

    def html_response(_):
        return starlette.responses.HTMLResponse(
            "<html><body><h1>Hello, world!</h1></body></html>")

    serve.create_backend("html_response", html_response)
    serve.create_endpoint("html_response",
                          backend="html_response",
                          route="/html_response")
    assert requests.get(
        "http://127.0.0.1:8000/html_response"
    ).text == "<html><body><h1>Hello, world!</h1></body></html>"

    def plain_text_response(_):
        return starlette.responses.PlainTextResponse("Hello, world!")

    serve.create_backend("plain_text_response", plain_text_response)
    serve.create_endpoint("plain_text_response",
                          backend="plain_text_response",
                          route="/plain_text_response")
    assert requests.get(
        "http://127.0.0.1:8000/plain_text_response").text == "Hello, world!"

    def json_response(_):
        return starlette.responses.JSONResponse({"hello": "world"})

    serve.create_backend("json_response", json_response)
    serve.create_endpoint("json_response",
                          backend="json_response",
                          route="/json_response")
    assert requests.get(
        "http://127.0.0.1:8000/json_response").json()["hello"] == "world"

    def redirect_response(_):
        return starlette.responses.RedirectResponse(
            url="http://127.0.0.1:8000/basic_response")

    serve.create_backend("redirect_response", redirect_response)
    serve.create_endpoint("redirect_response",
                          backend="redirect_response",
                          route="/redirect_response")
    assert requests.get(
        "http://127.0.0.1:8000/redirect_response").text == "Hello, world!"

    def streaming_response(_):
        async def slow_numbers():
            for number in range(1, 4):
                yield str(number)
                await asyncio.sleep(0.01)

        return starlette.responses.StreamingResponse(slow_numbers(),
                                                     media_type="text/plain",
                                                     status_code=418)

    serve.create_backend("streaming_response", streaming_response)
    serve.create_endpoint("streaming_response",
                          backend="streaming_response",
                          route="/streaming_response")
    resp = requests.get("http://127.0.0.1:8000/streaming_response")
    assert resp.text == "123"
    assert resp.status_code == 418
예제 #2
0
 def connect_in_backend(_):
     serve.create_backend("backend-ception", connect_in_backend)
예제 #3
0
파일: echo.py 프로젝트: zhuohan123/ray
"""
Example service that prints out http context.
"""

import time

import requests

from ray import serve
from ray.serve.utils import pformat_color_json


def echo(flask_request):
    return "hello " + flask_request.args.get("name", "serve!")


serve.init()

serve.create_backend("echo:v1", echo)
serve.create_endpoint("my_endpoint", backend="echo:v1", route="/echo")

while True:
    resp = requests.get("http://127.0.0.1:8000/echo").json()
    print(pformat_color_json(resp))

    print("...Sleeping for 2 seconds...")
    time.sleep(2)
예제 #4
0
            payload["petal length"],
            payload["petal width"],
        ]
        prediction = self.model.predict([input_vector])[0]
        human_name = self.label_list[prediction]
        return {"result": human_name}


# connect to our existing Ray cluster
# note that the password will be different for your redis instance!
ray.init(address="auto")
# now we initialize /connect to the Ray service

# listen on 0.0.0.0 to make the HTTP server accessible from other machines.
serve.init(http_host="0.0.0.0")
serve.create_backend("lr:v1", BoostingModel)
serve.create_endpoint("iris_classifier", backend="lr:v1", route="/regressor")
# __doc_create_deploy_end__

# __doc_query_begin__
import requests  # noqa: E402

sample_request_input = {
    "sepal length": 1.2,
    "sepal width": 1.0,
    "petal length": 1.1,
    "petal width": 0.9,
}
response = requests.get(
    "http://localhost:8000/regressor", json=sample_request_input)
print(response.text)
예제 #5
0
    return colorful_json


class MagicCounter:
    def __init__(self, increment):
        self.increment = increment

    def __call__(self, flask_request, base_number=None):
        if serve.context.web:
            base_number = int(flask_request.args.get("base_number", "0"))
        return base_number + self.increment


serve.init()
serve.create_backend("counter:v1", MagicCounter, 42)  # increment=42
serve.create_endpoint("magic_counter", backend="counter:v1", route="/counter")

print("Sending ten queries via HTTP")
for i in range(10):
    url = "http://127.0.0.1:8000/counter?base_number={}".format(i)
    print("> Pinging {}".format(url))
    resp = requests.get(url).json()
    print(pformat_color_json(resp))

    time.sleep(0.2)

print("Sending ten queries via Python")
handle = serve.get_handle("magic_counter")
for i in range(10):
    print("> Pinging handle.remote(base_number={})".format(i))
예제 #6
0
# blocking=True will wait for HTTP server to be ready to serve request.
serve.init(blocking=True)

# an endpoint is associated with an http URL.
serve.create_endpoint("my_endpoint", "/echo")


# a backend can be a function or class.
# it can be made to be invoked from web as well as python.
def echo_v1(flask_request, response="hello from python!"):
    if serve.context.web:
        response = flask_request.url
    return response


serve.create_backend(echo_v1, "echo:v1")
serve.set_traffic("my_endpoint", {"echo:v1": 1.0})

# wait for routing table to get populated
time.sleep(2)

# relative slo (10 ms deadline) can be specified via http
slo_ms = 10.0
# absolute slo (10 ms deadline) can be specified via http
abs_slo_ms = 11.9
print("> [HTTP] Pinging http://127.0.0.1:8000/"
      "echo?relative_slo_ms={}".format(slo_ms))
print(
    requests.get("http://127.0.0.1:8000/"
                 "echo?relative_slo_ms={}".format(slo_ms)).json())
print("> [HTTP] Pinging http://127.0.0.1:8000/"
예제 #7
0
    "chmod +x hey_linux_amd64"
])

ray.init(address=cluster.address, dashboard_host="0.0.0.0")
serve.init()


@serve.accept_batch
def echo(_):
    time.sleep(0.01)  # Sleep for 10ms
    ray.show_in_webui(str(serve.context.batch_size), key="Current batch size")
    return ["hi {}".format(i) for i in range(serve.context.batch_size)]


config = {"num_replicas": 30, "max_batch_size": 16}
serve.create_backend("echo:v1", echo, config=config)
serve.create_endpoint("echo", backend="echo:v1", route="/echo")

print("Warming up")
for _ in range(5):
    resp = requests.get("http://127.0.0.1:8000/echo").text
    print(resp)
    time.sleep(0.5)

connections = int(config["num_replicas"] * config["max_batch_size"] * 0.75)

while True:
    proc = subprocess.Popen([
        "./hey_linux_amd64", "-c",
        str(connections), "-z", "60m", "http://127.0.0.1:8000/echo"
    ],
예제 #8
0
            [self.preprocessor(i).unsqueeze(0) for i in pil_images])
        print("[2/3] Images transformed, tensor shape {}".format(
            input_tensor.shape))

        with torch.no_grad():
            output_tensor = self.model(input_tensor)
        print("[3/3] Inference done!")
        return {"class_index": int(torch.argmax(output_tensor[0]))}


# __doc_define_servable_end__

ray.init(num_cpus=8)
# __doc_deploy_begin__
serve.start()
serve.create_backend("resnet18:v0", ImageModel)
serve.create_endpoint("predictor",
                      backend="resnet18:v0",
                      route="/image_predict",
                      methods=["POST"])
# __doc_deploy_end__

# __doc_query_begin__
ray_logo_bytes = requests.get(
    "https://github.com/ray-project/ray/raw/"
    "master/doc/source/images/ray_header_logo.png").content

resp = requests.post("http://localhost:8000/image_predict",
                     data=ray_logo_bytes)
print(resp.json())
# Output
예제 #9
0
        if serve.context.batch_size is not None:
            batch_size = serve.context.batch_size
            result = []
            for base_num in base_number:
                ret_str = "Number: {} Batch size: {}".format(
                    base_num, batch_size)
                result.append(ret_str)
            return result
        return ""


serve.init(blocking=True)
serve.create_endpoint("magic_counter", "/counter", blocking=True)
# specify max_batch_size in BackendConfig
b_config = BackendConfig(max_batch_size=5)
serve.create_backend(
    MagicCounter, "counter:v1", 42, backend_config=b_config)  # increment=42
print("Backend Config for backend: 'counter:v1'")
print(b_config)
serve.link("magic_counter", "counter:v1")

handle = serve.get_handle("magic_counter")
future_list = []

# fire 30 requests
for r in range(30):
    print("> [REMOTE] Pinging handle.remote(base_number={})".format(r))
    f = handle.remote(base_number=r)
    future_list.append(f)

# get results of queries as they complete
left_futures = future_list