Beispiel #1
0
def test_autoscaling_0_replica(serve_instance):
    autoscaling_config = {
        "metrics_interval_s": 0.1,
        "min_replicas": 0,
        "max_replicas": 2,
        "look_back_period_s": 0.4,
        "downscale_delay_s": 0,
        "upscale_delay_s": 0,
    }

    @serve.deployment(
        autoscaling_config=autoscaling_config, )
    class Model:
        def __init__(self, weight):
            self.weight = weight

        def forward(self, input):
            return input + self.weight

    with InputNode() as user_input:
        model = Model.bind(1)
        output = model.forward.bind(user_input)
        serve_dag = DAGDriver.options(
            route_prefix="/my-dag",
            autoscaling_config=autoscaling_config,
        ).bind(output)
    dag_handle = serve.run(serve_dag)
    assert 2 == ray.get(dag_handle.predict.remote(1))
Beispiel #2
0
def test_deploy_nullify_route_prefix(serve_instance, prefixes):
    @serve.deployment
    def f(*args):
        return "got me"

    for prefix in prefixes:
        dag = DAGDriver.options(route_prefix=prefix).bind(f.bind())
        handle = serve.run(dag)
        if prefix is None:
            assert requests.get("http://localhost:8000/f").status_code == 404
        else:
            assert requests.get("http://localhost:8000/f").text == '"got me"'
        assert ray.get(handle.predict.remote()) == "got me"
Beispiel #3
0
def test_autoscaling_with_chain_nodes(min_replicas, serve_instance):

    signal = SignalActor.remote()

    autoscaling_config = {
        "metrics_interval_s": 0.1,
        "min_replicas": min_replicas,
        "max_replicas": 2,
        "look_back_period_s": 0.4,
        "downscale_delay_s": 30,
        "upscale_delay_s": 0,
    }

    @serve.deployment(
        autoscaling_config=autoscaling_config,
        _graceful_shutdown_timeout_s=1,
    )
    class Model1:
        def __init__(self, weight):
            self.weight = weight

        def forward(self, input):
            ray.get(signal.wait.remote())
            return input + self.weight

    @serve.deployment(
        autoscaling_config=autoscaling_config,
        _graceful_shutdown_timeout_s=1,
    )
    class Model2:
        def __init__(self, weight):
            self.weight = weight

        def forward(self, input):
            return input + self.weight

    with InputNode() as user_input:
        model1 = Model1.bind(0)
        model2 = Model2.bind(1)
        output = model1.forward.bind(user_input)
        output2 = model2.forward.bind(output)
        serve_dag = DAGDriver.options(
            route_prefix="/my-dag",
            autoscaling_config=autoscaling_config,
            _graceful_shutdown_timeout_s=1,
        ).bind(output2)

    dag_handle = serve.run(serve_dag)
    controller = serve_instance._controller

    # upscaling
    [dag_handle.predict.remote(0) for _ in range(10)]
    wait_for_condition(
        lambda: get_num_running_replicas(controller, DAGDriver.name) >= 1)
    [dag_handle.predict.remote(0) for _ in range(10)]
    wait_for_condition(
        lambda: get_num_running_replicas(controller, DAGDriver.name) >= 2)
    wait_for_condition(
        lambda: get_num_running_replicas(controller, Model1.name) >= 1,
        timeout=40)
    wait_for_condition(
        lambda: get_num_running_replicas(controller, Model1.name) >= 2,
        timeout=40)
    signal.send.remote()
    wait_for_condition(
        lambda: get_num_running_replicas(controller, Model2.name) >= 1,
        timeout=40)

    # downscaling
    wait_for_condition(
        lambda: get_num_running_replicas(controller, DAGDriver.name) ==
        min_replicas,
        timeout=60,
    )
    wait_for_condition(
        lambda: get_num_running_replicas(controller, Model1.name) ==
        min_replicas,
        timeout=60,
    )
    wait_for_condition(
        lambda: get_num_running_replicas(controller, Model2.name) ==
        min_replicas,
        timeout=60,
    )
def test_autoscaling_with_ensemble_nodes(serve_instance):

    signal = SignalActor.remote()
    autoscaling_config = {
        "metrics_interval_s": 0.1,
        "min_replicas": 0,
        "max_replicas": 2,
        "look_back_period_s": 0.4,
        "downscale_delay_s": 30,
        "upscale_delay_s": 0,
    }

    @serve.deployment(
        _autoscaling_config=autoscaling_config,
        _graceful_shutdown_timeout_s=1,
    )
    class Model:
        def __init__(self, weight):
            self.weight = weight

        def forward(self, input):
            return input + self.weight

    @serve.deployment(
        _autoscaling_config=autoscaling_config,
        _graceful_shutdown_timeout_s=1,
    )
    def combine(value_refs):
        ray.get(signal.wait.remote())
        return sum(ray.get(value_refs))

    with InputNode() as user_input:
        model1 = Model.bind(0)
        model2 = Model.bind(1)
        output1 = model1.forward.bind(user_input)
        output2 = model2.forward.bind(user_input)
        output = combine.bind([output1, output2])
        serve_dag = DAGDriver.options(
            route_prefix="/my-dag",
            _autoscaling_config=autoscaling_config,
            _graceful_shutdown_timeout_s=1,
        ).bind(output)

    dag_handle = serve.run(serve_dag)
    controller = serve_instance._controller

    assert get_num_running_replicas(controller, "Model") == 0
    assert get_num_running_replicas(controller, "Model_1") == 0
    assert get_num_running_replicas(controller, "combine") == 0

    # upscaling
    [dag_handle.predict.remote(0) for _ in range(10)]
    wait_for_condition(
        lambda: get_num_running_replicas(controller, DAGDriver.name) >= 1)

    wait_for_condition(
        lambda: get_num_running_replicas(controller, "Model") >= 1, timeout=40)
    wait_for_condition(
        lambda: get_num_running_replicas(controller, "Model_1") >= 1,
        timeout=40)
    wait_for_condition(
        lambda: get_num_running_replicas(controller, "combine") >= 2,
        timeout=40)
    signal.send.remote()
    # downscaling
    wait_for_condition(
        lambda: get_num_running_replicas(controller, DAGDriver.name) == 0,
        timeout=60,
    )
    wait_for_condition(
        lambda: get_num_running_replicas(controller, "Model") == 0,
        timeout=60,
    )
    wait_for_condition(
        lambda: get_num_running_replicas(controller, "Model_1") == 0,
        timeout=60,
    )
    wait_for_condition(
        lambda: get_num_running_replicas(controller, "combine") == 0,
        timeout=60)
Beispiel #5
0
    def forward(self, input: ModelInputData):
        return input.model_input1 + len(input.model_input2) + self.weight


@serve.deployment
def combine(value_refs):
    return sum(ray.get(value_refs))


with InputNode() as user_input:
    model1 = Model.bind(0)
    model2 = Model.bind(1)
    output1 = model1.forward.bind(user_input)
    output2 = model2.forward.bind(user_input)
    dag = combine.bind([output1, output2])
    serve_dag = DAGDriver.options(route_prefix="/my-dag").bind(
        dag, http_adapter=ModelInputData)

dag_handle = serve.run(serve_dag)

print(
    ray.get(
        dag_handle.predict.remote(
            ModelInputData(model_input1=1, model_input2="test"))))
print(
    requests.post("http://127.0.0.1:8000/my-dag",
                  json={
                      "model_input1": 1,
                      "model_input2": "test"
                  }).text)