Exemple #1
0
async def startup_event():
	ray.init(address="auto")
	client = serve.start()

	class GPT2:
		def __init__(self):
			self.nlp_model = pipeline('text-generation', model='gpt2')

		def __call__(self, request):
			return self.nlp_model(request._data, max_length=50)


	backend_config = serve.BackendConfig(num_replicas=10)
	client.create_backend("gpt-2", GPT2, config=backend_config)
	client.create_endpoint("generate", backend="gpt-2")

	global serve_handle
	serve_handle = client.get_handle("generate")
async def startup_event():
    ray.init(address="auto")  # Connect to the running Ray cluster.
    serve.start(http_host=None)  # Start the Ray Serve instance.

    # Define a callable class to use for our Ray Serve backend.
    class GPT2:
        def __init__(self):
            self.nlp_model = pipeline("text-generation", model="gpt2")

        async def __call__(self, request):
            return self.nlp_model(await request.body(), max_length=50)

    # Set up a Ray Serve backend with the desired number of replicas.
    backend_config = serve.BackendConfig(num_replicas=2)
    serve.create_backend("gpt-2", GPT2, config=backend_config)
    serve.create_endpoint("generate", backend="gpt-2")

    # Get a handle to our Ray Serve endpoint so we can query it in Python.
    global serve_handle
    serve_handle = serve.get_handle("generate")
# File name: deploy_serve.py
import ray
from ray import serve

# Connect to the running Ray cluster.
ray.init(address="auto")

# Start a detached Ray Serve instance.  It will persist after the script exits.
client = serve.start(http_host=None, detached=True)


# Define a function to serve. Alternatively, you could define a stateful class.
async def my_model(request):
    data = await request.body()
    return f"Model received data: {data}"


# Set up a backend with the desired number of replicas and set up an endpoint.
backend_config = serve.BackendConfig(num_replicas=2)
client.create_backend("my_backend", my_model, config=backend_config)
client.create_endpoint("my_endpoint", backend="my_backend")
Exemple #4
0
def main(config_file):
    with open(config_file) as f:
        config = json.load(f)

    names = []
    for model in config["models"]:
        client.create_backend(model["name"], ImportedBackend(model["class"]), *model.get("args", []), config=serve.BackendConfig(**model.get("config", {})))
        client.create_endpoint(model["name"], backend=model["name"])
        names.append(model["name"])
    
    client.create_backend(config["name"], ImportedBackend("serve_pipeline.ModelPipeline"), names)
    client.create_endpoint(config["name"], backend=config["name"], route=config.get("route", None))
                                            "--batch_size")
else:
    args["--batch_size"] = 1

ray.init(address=args["--ray_address"], redis_password=args["--ray_password"])
serve.init(start_server=False)

input_p = Path(args["--input_directory"])
output_p = Path(args["--output_directory"])

all_wavs = list(input_p.rglob("**/*.WAV"))

# model = RunSplitter()
# predictions = model(None, audio_paths=all_wavs[0:10])
# print(predictions)

serve.create_endpoint("splitter")
serve.create_backend(
    RunSplitter,
    "splitter:v0",
    backend_config=serve.BackendConfig(num_replicas=args["--num_nodes"],
                                       max_batch_size=args["--batch_size"]),
)
serve.link("splitter", "splitter:v0")

handle = serve.get_handle("splitter")

ids = [handle.remote(audio_paths=audio_path) for audio_path in all_wavs]
results = ray.get(ids)
print(results)
Exemple #6
0
    "chmod +x hey_linux_amd64"
])

ray.init(address=cluster.address, include_webui=True, webui_host="0.0.0.0")
serve.init(blocking=True, kv_store_connector=lambda ns: RayInternalKVStore(ns))


@serve.accept_batch
def echo(_):
    time.sleep(0.01)  # Sleep for 10ms
    ray.show_in_webui(str(serve.context.batch_size), key="Current batch size")
    return ["hi {}".format(i) for i in range(serve.context.batch_size)]


serve.create_endpoint("echo", "/echo")
config = serve.BackendConfig(num_replicas=30, max_batch_size=16)
serve.create_backend(echo, "echo:v1", backend_config=config)
serve.set_traffic("echo", {"echo:v1": 1})

print("Warming up")
for _ in range(5):
    resp = requests.get("http://127.0.0.1:8000/echo").text
    print(resp)
    time.sleep(0.5)

connections = int(config.num_replicas * config.max_batch_size * 0.75)

while True:
    proc = subprocess.Popen([
        "./hey_linux_amd64", "-c",
        str(connections), "-z", "60m", "http://127.0.0.1:8000/echo"