Esempio n. 1
0
def test_deployment_properties():
    class DClass():
        pass

    D = serve.deployment(name="name",
                         init_args=("hello", 123),
                         version="version",
                         num_replicas=2,
                         user_config="hi",
                         max_concurrent_queries=100,
                         route_prefix="/hello",
                         ray_actor_options={"num_cpus": 2})(DClass)

    assert D.name == "name"
    assert D.init_args == ("hello", 123)
    assert D.version == "version"
    assert D.num_replicas == 2
    assert D.user_config == "hi"
    assert D.max_concurrent_queries == 100
    assert D.route_prefix == "/hello"
    assert D.ray_actor_options == {"num_cpus": 2}

    D = serve.deployment(
        version=None,
        route_prefix=None,
    )(DClass)
    assert D.version is None
    assert D.route_prefix is None
Esempio n. 2
0
    def test_import_path_deployment(self, serve_instance):
        test_env_uri = (
            "https://github.com/shrekris-anyscale/test_deploy_group/archive/HEAD.zip"
        )
        test_module_uri = (
            "https://github.com/shrekris-anyscale/test_module/archive/HEAD.zip"
        )

        ray_actor_options = {
            "runtime_env": {"py_modules": [test_env_uri, test_module_uri]}
        }

        shallow = serve.deployment(
            name="shallow",
            ray_actor_options=ray_actor_options,
        )("test_env.shallow_import.ShallowClass")

        deep = serve.deployment(
            name="deep",
            ray_actor_options=ray_actor_options,
        )("test_env.subdir1.subdir2.deep_import.DeepClass")

        one = serve.deployment(
            name="one",
            ray_actor_options=ray_actor_options,
        )("test_module.test.one")

        deployments = [shallow, deep, one]
        responses = ["Hello shallow world!", "Hello deep world!", 2]

        self.deploy_and_check_responses(deployments, responses)
Esempio n. 3
0
def test_simple_adder(serve_instance):
    serve.deployment(name="Adder")(ModelWrapper).deploy(
        predictor_cls=AdderPredictor,
        checkpoint=AdderCheckpoint.from_dict({"increment": 2}),
    )
    resp = ray.get(send_request.remote(json={"array": [40]}))
    assert resp == {"value": [42], "batch_size": 1}
Esempio n. 4
0
def test_batching(serve_instance):
    serve.deployment(name="Adder")(ModelWrapper).deploy(
        predictor_cls=AdderPredictor,
        checkpoint=AdderCheckpoint.from_dict({"increment": 2}),
        batching_params=dict(max_batch_size=2, batch_wait_timeout_s=1000),
    )

    refs = [send_request.remote(json={"array": [40]}) for _ in range(2)]
    for resp in ray.get(refs):
        assert resp == {"value": [42], "batch_size": 2}
Esempio n. 5
0
    def test_get_import_path_nested_actor(self):
        d = serve.deployment(name="actor")(DecoratedActor)

        # CI may change the parent path, so check only that the suffix matches.
        assert get_deployment_import_path(d).endswith(
            "ray.serve.tests.test_util.DecoratedActor"
        )
Esempio n. 6
0
    def test_import_path_deployment_decorated(self, serve_instance):
        func = serve.deployment(
            name="decorated_func",
        )("ray.serve.tests.test_application.decorated_func")

        clss = serve.deployment(
            name="decorated_clss",
        )("ray.serve.tests.test_application.DecoratedClass")

        deployments = [func, clss]
        responses = ["got decorated func", "got decorated class"]

        self.deploy_and_check_responses(deployments, responses)

        # Check that non-default decorated values were overwritten
        assert serve.get_deployment("decorated_func").max_concurrent_queries != 17
        assert serve.get_deployment("decorated_clss").max_concurrent_queries != 17
Esempio n. 7
0
def test_class_factory(serve_instance):
    with InputNode() as _:
        instance = serve.deployment(class_factory()).bind(3)
        output = instance.get.bind()
        serve_dag = NoargDriver.bind(output)

    handle = serve.run(serve_dag)
    assert ray.get(handle.remote()) == 3
    assert requests.get("http://127.0.0.1:8000/").text == "3"
Esempio n. 8
0
def make_ingress_deployment(
    name: str, serve_dag_root_json: str, input_schema_path: str
):
    return serve.deployment(ModelWrapper).options(
        name=name,
        init_kwargs={
            "predictor_cls": _get_import_path(PipelineIngressModel),
            "checkpoint": {
                "checkpoint_cls": _get_import_path(PipelineIngressCheckpoint),
                "uri": serve_dag_root_json,
            },
            "input_schema": input_schema_path,
            "batching_params": False,
        },
    )
Esempio n. 9
0
def test_serve_pipeline_class_factory_plot():
    with InputNode() as _:
        instance = serve.deployment(class_factory()).bind(3)
        output = instance.get.bind()
        serve_dag = NoargDriver.bind(output)
        serve_dag = ray_dag_to_serve_dag(serve_dag)

    with tempfile.TemporaryDirectory() as tmpdir:
        to_file = os.path.join(tmpdir, "tmp.png")
        ray.experimental.dag.plot(serve_dag, to_file)
        assert os.path.isfile(to_file)

    graph = ray.experimental.dag.vis_utils.dag_to_dot(serve_dag)
    to_string = graph.to_string()
    assert "MyInlineClass -> get" in to_string
    assert "get -> NoargDriver" in to_string
Esempio n. 10
0
def create_deployment(
    address: str,
    namespace: str,
    runtime_env_json: str,
    deployment: str,
    options_json: str,
):
    ray.init(
        address=address,
        namespace=namespace,
        runtime_env=json.loads(runtime_env_json),
    )
    deployment_cls = import_attr(deployment)
    if not isinstance(deployment_cls, Deployment):
        deployment_cls = serve.deployment(deployment_cls)
    options = json.loads(options_json)
    deployment_cls.options(**options).deploy()
def init(
    backend,
    num_replicas: int,
    max_concurrent_queries: int,
    resources,
    counter,
    conda_env,
):
    backend_path = backend.split(".")[1:-2]
    route = "/".join(backend_path).lower()

    backend_module, backend_class = backend.rsplit(".", 1)
    backend_module = importlib.import_module(backend_module)
    backend_class = getattr(backend_module, backend_class)

    ray_actor_options = {}
    if "cpu" in resources:
        ray_actor_options["num_cpus"] = resources["cpu"]

    if "gpu" in resources:
        ray_actor_options["num_gpus"] = resources["gpu"]

    if conda_env:
        conda_env = ray.serve.CondaEnv(conda_env)

    deployment = serve.deployment(backend_class)

    deployment.options(
        name=route,
        route_prefix="/" + route,
        ray_actor_options=ray_actor_options,
        num_replicas=num_replicas,
        max_concurrent_queries=max_concurrent_queries,
    ).deploy(
        counter,
        route,
        max_concurrent_queries,
    )
Esempio n. 12
0
def create_deployment(deployment: str, options_json: str):
    deployment_cls = import_attr(deployment)
    if not isinstance(deployment_cls, Deployment):
        deployment_cls = serve.deployment(deployment_cls)
    options = json.loads(options_json)
    deployment_cls.options(**options).deploy()
Esempio n. 13
0
def run(
    config_or_import_path: str,
    args_and_kwargs: Tuple[str],
    runtime_env: str,
    runtime_env_json: str,
    working_dir: str,
    address: str,
):

    # Check if path provided is for config or import
    is_config = pathlib.Path(config_or_import_path).is_file()
    args, kwargs = _process_args_and_kwargs(args_and_kwargs)

    # Calculate deployments' runtime env updates requested via args
    runtime_env_updates = parse_runtime_env_args(
        runtime_env=runtime_env,
        runtime_env_json=runtime_env_json,
        working_dir=working_dir,
    )

    # Create ray.init()'s runtime_env
    if "working_dir" in runtime_env_updates:
        ray_runtime_env = {
            "working_dir": runtime_env_updates.pop("working_dir")
        }
    else:
        ray_runtime_env = {}

    if is_config:
        config_path = config_or_import_path
        # Delay serve.start() to catch invalid inputs without waiting
        if len(args) + len(kwargs) > 0:
            raise ValueError(
                "ARGS_AND_KWARGS cannot be defined for a "
                "config file deployment. Please specify the "
                "init_args and init_kwargs inside the config file.")

        cli_logger.print("Deploying application in config file at "
                         f"{config_path}.")
        with open(config_path, "r") as config_file:
            app = Application.from_yaml(config_file)

    else:
        import_path = config_or_import_path
        if "." not in import_path:
            raise ValueError(
                "Import paths must be of the form "
                '"module.submodule_1...submodule_n.MyClassOrFunction".')

        cli_logger.print(
            f'Deploying function or class imported from "{import_path}".')

        deployment_name = import_path[import_path.rfind(".") + 1:]
        deployment = serve.deployment(name=deployment_name)(import_path)

        app = Application(
            [deployment.options(init_args=args, init_kwargs=kwargs)])

    ray.init(address=address, namespace="serve", runtime_env=ray_runtime_env)

    for deployment in app:
        _configure_runtime_env(deployment, runtime_env_updates)

    app.run(logger=cli_logger)
Esempio n. 14
0
def test_redeploy_multiple_replicas(serve_instance, use_handle):
    # Tests that redeploying a deployment with multiple replicas performs
    # a rolling update.
    client = serve_instance

    name = "test"

    @ray.remote(num_cpus=0)
    def call(block=False):
        if use_handle:
            handle = serve.get_handle(name, missing_ok=True)
            ret = ray.get(handle.remote(block=str(block)))
        else:
            ret = requests.get(
                f"http://localhost:8000/{name}", params={
                    "block": block
                }).text

        return ret.split("|")[0], ret.split("|")[1]

    signal_name = f"signal-{get_random_letters()}"
    signal = SignalActor.options(name=signal_name).remote()

    async def v1(request):
        if request.query_params["block"] == "True":
            signal = ray.get_actor(signal_name)
            await signal.wait.remote()
        return f"1|{os.getpid()}"

    def v2(*args):
        return f"2|{os.getpid()}"

    def make_nonblocking_calls(expected, expect_blocking=False):
        # Returns dict[val, set(pid)].
        blocking = []
        responses = defaultdict(set)
        start = time.time()
        while time.time() - start < 30:
            refs = [call.remote(block=False) for _ in range(10)]
            ready, not_ready = ray.wait(refs, timeout=0.5)
            for ref in ready:
                val, pid = ray.get(ref)
                responses[val].add(pid)
            for ref in not_ready:
                blocking.extend(not_ready)

            if (all(
                    len(responses[val]) == num
                    for val, num in expected.items())
                    and (expect_blocking is False or len(blocking) > 0)):
                break
        else:
            assert False, f"Timed out, responses: {responses}."

        return responses, blocking

    v1 = serve.deployment(name=name, version="1", num_replicas=2)(v1)
    v1.deploy()
    responses1, _ = make_nonblocking_calls({"1": 2})
    pids1 = responses1["1"]

    # ref2 will block a single replica until the signal is sent. Check that
    # some requests are now blocking.
    ref2 = call.remote(block=True)
    responses2, blocking2 = make_nonblocking_calls(
        {
            "1": 1
        }, expect_blocking=True)
    assert list(responses2["1"])[0] in pids1

    # Redeploy new version. Since there is one replica blocking, only one new
    # replica should be started up.
    v2 = v1.options(backend_def=v2, version="2")
    goal_ref = v2.deploy(_blocking=False)
    assert not client._wait_for_goal(goal_ref, timeout=0.1)
    responses3, blocking3 = make_nonblocking_calls(
        {
            "1": 1
        }, expect_blocking=True)

    # Signal the original call to exit.
    ray.get(signal.send.remote())
    val, pid = ray.get(ref2)
    assert val == "1"
    assert pid in responses1["1"]

    # Now the goal and requests to the new version should complete.
    # We should have two running replicas of the new version.
    assert client._wait_for_goal(goal_ref)
    make_nonblocking_calls({"2": 2})