예제 #1
0
def get_policies_api(request):
    def _process(row, backend_list, backend_traffic):
        row[5] = json.loads(row[5])
        row[6] = json.loads(row[6])
        row += (None,None) if row[7] is None or row[7] not in backend_list.keys() \
                            else (backend_list[row[7]].num_replicas, backend_list[row[7]].max_concurrent_queries)
        row += (None, ) if row[7] is None else (backend_traffic.get(
            row[7], None), )
        return tuple(row)

    sql = '''SELECT policy.cluster_id as trainer_id,
                    trainer_cluster.name as trainer_name,
                    policy.policy_id as policy_id, 
                    policy.model_name as model_name,
                    policy.checkpoint as checkpoint,
                    policy.agent_config,
                    policy.sim_config,
                    policy.backend_name
             FROM policy INNER JOIN trainer_cluster ON policy.cluster_id = trainer_cluster.id'''
    db = db_connect(BACKOFFICE_DB_NAME, check_same_thread=False)
    rows = select_all(db, sql)
    backend = serve.connect()
    global BACKOFFICE_ENDPOINTS
    backend_traffic = {
        backend: endpoint
        for endpoint, props in backend.list_endpoints().items()
        if endpoint not in BACKOFFICE_ENDPOINTS.keys()
        for backend in props['traffic'].keys()
    }
    data = [
        _process(list(row), backend.list_backends(), backend_traffic)
        for row in rows
    ]
    return json.dumps(data)
예제 #2
0
def test_detached_deployment():
    # https://github.com/ray-project/ray/issues/11437

    cluster = Cluster()
    head_node = cluster.add_node(node_ip_address="127.0.0.1", num_cpus=6)

    # Create first job, check we can run a simple serve endpoint
    ray.init(head_node.address)
    first_job_id = ray.get_runtime_context().job_id
    client = serve.start(detached=True)
    client.create_backend("f", lambda _: "hello")
    client.create_endpoint("f", backend="f")
    assert ray.get(client.get_handle("f").remote()) == "hello"

    ray.shutdown()

    # Create the second job, make sure we can still create new backends.
    ray.init(head_node.address)
    assert ray.get_runtime_context().job_id != first_job_id

    client = serve.connect()
    client.create_backend("g", lambda _: "world")
    client.create_endpoint("g", backend="g")
    assert ray.get(client.get_handle("g").remote()) == "world"

    # Test passed, clean up.
    client.shutdown()
    ray.shutdown()
    cluster.shutdown()
예제 #3
0
파일: test_api.py 프로젝트: zaouk/ray
def test_connect(serve_instance):
    client = serve_instance

    # Check that you can have multiple clients to the same detached instance.
    client2 = serve.connect()
    assert client._controller_name == client2._controller_name

    # Check that you can have detached and non-detached instances.
    client3 = serve.start(http_port=8004)
    assert client3._controller_name != client._controller_name

    # Check that you can call serve.connect() from within a backend for both
    # detached and non-detached instances.

    def connect_in_backend(_):
        client = serve.connect()
        client.create_backend("backend-ception", connect_in_backend)
        return client._controller_name

    client.create_backend("connect_in_backend", connect_in_backend)
    client.create_endpoint("endpoint", backend="connect_in_backend")
    handle = client.get_handle("endpoint")
    assert ray.get(handle.remote()) == client._controller_name
    assert "backend-ception" in client.list_backends().keys()

    client3.create_backend("connect_in_backend", connect_in_backend)
    client3.create_endpoint("endpoint", backend="connect_in_backend")
    handle = client3.get_handle("endpoint")
    assert ray.get(handle.remote()) == client3._controller_name
    assert "backend-ception" in client3.list_backends().keys()
예제 #4
0
    def __init__(self, port=None, tag=None):

        # log
        if tag is None:
            self.log = logging.getLogger(__name__)
        else:
            self.log = logging.getLogger("%s.%s" % (tag, RAY.TAG))

        ray.init(address="auto")
        nodes_info = ray.nodes()
        try:
            self.client = serve.start(http_options={
                "location":
                "EveryNode",
                "host":
                "0.0.0.0",
                "port":
                port,
                "middlewares": [
                    Middleware(CORSMiddleware,
                               allow_origins=["*"],
                               allow_methods=["*"])
                ]
            },
                                      detached=True)
            self.log.info(
                "Ray serve initialized, node number: {} \n Nodes Info: {}".
                format(len(nodes_info), nodes_info))

        except RayServeException:
            self.client = serve.connect()
            self.log.info(
                "Connected existing Ray serve, node number: {} \n Nodes Info: {}"
                .format(len(nodes_info), nodes_info))
예제 #5
0
def test_ray_client(ray_client_instance):
    ray.util.connect(ray_client_instance)

    start = """
import ray
ray.util.connect("{}")

from ray import serve

serve.start(detached=True)
""".format(ray_client_instance)
    run_string_as_driver(start)

    serve.connect()

    deploy = """
import ray
ray.util.connect("{}")

from ray import serve

@serve.deployment(name="test1", route_prefix="/hello")
def f(*args):
    return "hello"

f.deploy()
""".format(ray_client_instance)
    run_string_as_driver(deploy)

    assert "test1" in serve.list_backends()
    assert "test1" in serve.list_endpoints()
    assert requests.get("http://localhost:8000/hello").text == "hello"

    delete = """
import ray
ray.util.connect("{}")

from ray import serve

serve.get_deployment("test1").delete()
""".format(ray_client_instance)
    run_string_as_driver(delete)

    assert "test1" not in serve.list_backends()
    assert "test1" not in serve.list_endpoints()
예제 #6
0
def client_init(endpoints, update):
    if update:
        client = serve.connect()
        for endpoint in endpoints:
            delete_endpoint(client, endpoint)
        return client

    # `detached=True` starts a long-running Ray Serve instance service, see also `ray_init()`
    return serve.start(http_host="0.0.0.0", http_port=8000, detached=True)
예제 #7
0
def get_endpoints_api(request):
    backend = serve.connect()
    global BACKOFFICE_ENDPOINTS
    data = {
        k: v
        for k, v in backend.list_endpoints().items()
        if k not in BACKOFFICE_ENDPOINTS.keys()
    }
    return json.dumps(data)
예제 #8
0
    def __init__(self):
        # Get handles to the two underlying models.
        client = serve.connect()
        self.color_handle = client.get_handle("color")
        self.plot_handle = client.get_handle("plot")

        # Store user click data in a detached actor.
        self.impressions = ImpressionStore.options(
            lifetime="detached", name="impressions").remote()
예제 #9
0
 def __init__(self, max_deployments=1):
     self.client = serve.connect()
     self.max_deployments = max_deployments
     self.weighted_actions = [
         (self.create_deployment, 1),
         (self.verify_deployment, 4),
     ]
     self.deployments = []
     for _ in range(max_deployments):
         self.create_deployment()
예제 #10
0
 def __init__(self, uri):
     super().__init__(uri)
     try:
         # TODO: support URI and redis password (ray-serve:/192.168....)?
         ray.init(address="auto")
     except ConnectionError:
         raise MlflowException("Could not find a running Ray instance.")
     try:
         self.client = serve.connect()
     except RayServeException:
         raise MlflowException(
             "Could not find a running Ray Serve instance on this Ray "
             "cluster.")
예제 #11
0
    def run_loop(self):
        while True:
            if self.should_retrain():
                # Retrain the model.
                print("Retraining model...")
                new_data_df = ray.get(self.impressions.get_model_clicks.remote("plot"))
                new_model = retrain_sklearn_lr_model(new_data_df)

                # Deploy the new model (using incremental rollout).
                client = serve.connect()
                backend_name = f"plot:{int(time.time())}"
                client.create_backend(backend_name, PlotRecommender, new_model)
                client.set_traffic("plot", {"plot:v0": 0.9, backend_name: 0.1})
                print(f"Deployed new backend {backend_name}.")

            time.sleep(1)
예제 #12
0
def test_scale_up(ray_cluster):
    cluster = ray_cluster
    cluster.add_node(num_cpus=1)
    cluster.connect()
    # By default, Serve controller and proxy actors use 0 CPUs,
    # so initially there should only be room for 1 replica.

    @serve.deployment("D", version="1", num_replicas=1)
    def D(*args):
        return os.getpid()

    def get_pids(expected, timeout=30):
        pids = set()
        start = time.time()
        while len(pids) < expected:
            pids.add(requests.get("http://localhost:8000/D").text)
            if time.time() - start >= timeout:
                raise TimeoutError("Timed out waiting for pids.")
        return pids

    serve.start(detached=True)
    client = serve.connect()

    D.deploy()
    pids1 = get_pids(1)

    goal_ref = D.options(num_replicas=3).deploy(_blocking=False)

    # Check that a new replica has not started in 1.0 seconds.  This
    # doesn't guarantee that a new replica won't ever be started, but
    # 1.0 seconds is a reasonable upper bound on replica startup time.
    assert not client._wait_for_goal(goal_ref, timeout=1.0)
    assert get_pids(1) == pids1

    # Add a node with another CPU, another replica should get placed.
    cluster.add_node(num_cpus=1)
    assert not client._wait_for_goal(goal_ref, timeout=1.0)
    pids2 = get_pids(2)
    assert pids1.issubset(pids2)

    # Add a node with another CPU, the final replica should get placed
    # and the deploy goal should be done.
    cluster.add_node(num_cpus=1)
    assert client._wait_for_goal(goal_ref)
    pids3 = get_pids(3)
    assert pids2.issubset(pids3)
예제 #13
0
def start_backend_server(config = None):
    #stderrout = sys.stderr
    #sys.stderr = open('modelserver.log', 'w')
    if not ray.is_initialized():
        ray.init(include_dashboard=False, log_to_driver=False, logging_level=0, address='auto')

    try:
        backend_server = serve.connect()
    except RayServeException:
        backend_server = serve.start(detached=True)

    if config != None:
        global _POLICY_ACTOR_CONFIG
        _POLICY_ACTOR_CONFIG = config

    #sys.stderr = stderrout
    #print("{} INFO Model Server started on {}".format(datetime.now(), addr))
    #print(
    #    "{} INFO Trainers Should Deploy Policies on this Server using address='{}'".format(datetime.now(), addr))
    return backend_server
예제 #14
0
def test_scale_up(ray_cluster):
    cluster = ray_cluster
    head_node = cluster.add_node(num_cpus=3)

    @serve.deployment("D", version="1", num_replicas=1)
    def D(*args):
        return os.getpid()

    def get_pids(expected, timeout=30):
        pids = set()
        start = time.time()
        while len(pids) < expected:
            pids.add(requests.get("http://localhost:8000/D").text)
            if time.time() - start >= timeout:
                raise TimeoutError("Timed out waiting for pids.")
        return pids

    ray.init(head_node.address)
    serve.start(detached=True)
    client = serve.connect()

    D.deploy()
    pids1 = get_pids(1)

    goal_ref = D.options(num_replicas=3).deploy(_blocking=False)
    assert not client._wait_for_goal(goal_ref, timeout=0.1)
    assert get_pids(1) == pids1

    # Add a node with another CPU, another replica should get placed.
    cluster.add_node(num_cpus=1)
    assert not client._wait_for_goal(goal_ref, timeout=0.1)
    pids2 = get_pids(2)
    assert pids1.issubset(pids2)

    # Add a node with another CPU, the final replica should get placed
    # and the deploy goal should be done.
    cluster.add_node(num_cpus=1)
    assert client._wait_for_goal(goal_ref)
    pids3 = get_pids(3)
    assert pids2.issubset(pids3)
예제 #15
0
def serve_new_model(model_dir, checkpoint, config, metrics, day, gpu=False):
    print("Serving checkpoint: {}".format(checkpoint))

    checkpoint_path = _move_checkpoint_to_model_dir(model_dir, checkpoint,
                                                    config, metrics)

    try:
        # Try to connect to an existing cluster.
        client = serve.connect()
    except RayServeException:
        # If this is the first run, need to start the cluster.
        client = serve.start(detached=True)

    backend_name = "mnist:day_{}".format(day)

    client.create_backend(backend_name, MNISTBackend, checkpoint_path, config,
                          metrics, gpu)

    if "mnist" not in client.list_endpoints():
        # First time we serve a model - create endpoint
        client.create_endpoint("mnist",
                               backend=backend_name,
                               route="/mnist",
                               methods=["POST"])
    else:
        # The endpoint already exists, route all traffic to the new model
        # Here you could also implement an incremental rollout, where only
        # a part of the traffic is sent to the new backend and the
        # rest is sent to the existing backends.
        client.set_traffic("mnist", {backend_name: 1.0})

    # Delete previous existing backends
    for existing_backend in client.list_backends():
        if existing_backend.startswith("mnist:day") and \
           existing_backend != backend_name:
            client.delete_backend(existing_backend)

    return True
예제 #16
0
 def check_handle_router_id():
     client = serve.connect()
     handle = client.get_handle("hi")
     return get_node_id_for_actor(handle.router_handle)
예제 #17
0
파일: scripts.py 프로젝트: tuyulers5/jav44
def shutdown():
    serve.connect().shutdown()
예제 #18
0
        model_distribution = await self.impressions.model_distribution.remote(
            session_key, request.args["liked_id"])

        # Select which results to send to the user based on their clicks.
        distribution, impressions, chosen = choose_ensemble_results(
            model_distribution, results)

        # Record this click and these recommendations.
        await self.impressions.record_impressions.remote(
            session_key, impressions)

        return {
            "sessionKey": session_key,
            "dist": distribution,
            "ids": chosen,
            "sources": {
                i["id"]: source
                for source, impression in impressions.items()
                for i in impression
            }
        }


if __name__ == "__main__":
    ray.init(address="auto")
    client = serve.connect()
    client.create_backend("ensemble:v0", ComposedModel)
    client.create_endpoint("ensemble",
                           backend="ensemble:v0",
                           route="/rec/ensemble")
예제 #19
0
파일: test_handle.py 프로젝트: zzmcdc/ray
 def wrapper_model(web_request):
     handle = serve.connect().get_handle("echo")
     return ray.get(handle.remote(web_request))
예제 #20
0
파일: handle.py 프로젝트: zseymour/ray
 def __init__(self):
     client = serve.connect()
     self.handle = client.get_handle("hello_world")
예제 #21
0
 def __init__(self):
     client = serve.connect()
     self.handle = client.get_handle("endpoint1")
예제 #22
0
 def do_blocking_delete():
     client = serve.connect()
     client.delete_endpoint("wait")
     client.delete_backend("wait")
예제 #23
0
 def __init__(self):
     client = serve.connect()
     self.model = client.get_handle("sum_model")
예제 #24
0
def test_ray_client(ray_client_instance):
    ray.util.connect(ray_client_instance, namespace="")

    start = """
import ray
ray.util.connect("{}", namespace="")

from ray import serve

serve.start(detached=True)
""".format(ray_client_instance)
    run_string_as_driver(start)

    serve.connect()

    deploy = """
import ray
ray.util.connect("{}", namespace="")

from ray import serve

@serve.deployment(name="test1", route_prefix="/hello")
def f(*args):
    return "hello"

f.deploy()
""".format(ray_client_instance)
    run_string_as_driver(deploy)

    assert "test1" in serve.list_backends()
    assert "test1" in serve.list_endpoints()
    assert requests.get("http://*****:*****@app.get("/")
def hello():
    return "hello"

@serve.deployment
@serve.ingress(app)
class A:
    pass

A.deploy()
""".format(ray_client_instance)
    run_string_as_driver(fastapi)

    assert requests.get("http://localhost:8000/A").json() == "hello"
예제 #25
0
 def __init__(self, models):
     client = serve.connect()
     self.model_handles = [
         client.get_handle(model, sync=False) for model in models
     ]
예제 #26
0
 def __init__(self):
     client = serve.connect()
     self.model_one = client.get_handle("model_one")
     self.model_two = client.get_handle("model_two")
예제 #27
0
파일: test_api.py 프로젝트: zaouk/ray
 def connect_in_backend(_):
     client = serve.connect()
     client.create_backend("backend-ception", connect_in_backend)
     return client._controller_name
예제 #28
0
    'get_trainers': (get_trainers_api, '/trainers'),
    'get_endpoints': (get_endpoints_api, '/endpoints'),
    'get_policies': (get_policies_api, '/policies'),
}

# Policies CPU Multiplexing factor. 0.5 = 2 policies / CPU
POLICY_ACTOR_CONFIG = {'num_cpus': 0.5}

# Start Backend

if __name__ == "__main__":
    if not ray.is_initialized():
        ray.init(address='auto')

    try:
        backend_server = serve.connect()
    except RayServeException:
        backend_server = serve.start(detached=True)

    endpoint_list = list(backend_server.list_endpoints().keys())
    backend_list = list(backend_server.list_backends().keys())

    backoffice_actor_config = {'num_cpus': 1 / len(BACKOFFICE_ENDPOINTS)}

    # Policy replicas
    policy_config = {'num_replicas': 1}

    for name, (service_function, route) in BACKOFFICE_ENDPOINTS.items():
        if name in endpoint_list:
            backend_server.delete_endpoint(name)
        if name in backend_list:
예제 #29
0
 def __init__(self, kill_period_s=1):
     self.client = serve.connect()
     self.kill_period_s = kill_period_s
예제 #30
0
 def __init__(self):
     client = serve.connect()
     self.handle = client.get_handle("backend")