Beispiel #1
0
    async def predict(self, request: InferenceRequest) -> InferenceResponse:

        request_dict = request.dict()

        insights_wrapper = InsightsWrapper(self.insights_manager)
        # TODO: Add request_id, response_headers, request_headers, etc
        payload_context = PayloadContext(request_id=request.id,
                                         request=request_dict)
        tempo_wrapper = TempoContextWrapper(payload_context, insights_wrapper,
                                            self.state)
        tempo_context.set(tempo_wrapper)

        response_dict = self._model.request(request_dict)
        if self._is_coroutine:
            response_dict = await response_dict  # type: ignore

        # TODO: Ensure model_version is added by mlserver
        response_dict["model_version"] = "NOTIMPLEMENTED"

        # TODO: Move to functions declared upfront with logic contained to avoid if
        if self._model.get_insights_mode == InsightRequestModes.ALL:
            insights_wrapper.log(request_dict,
                                 insights_type=InsightsTypes.INFER_REQUEST)
            insights_wrapper.log(response_dict,
                                 insights_type=InsightsTypes.INFER_RESPONSE)
        else:
            if self._model.get_insights_mode == InsightRequestModes.REQUEST or insights_wrapper.set_log_request:
                insights_wrapper.log(request_dict,
                                     insights_type=InsightsTypes.INFER_REQUEST)
            if self._model.get_insights_mode == InsightRequestModes.RESPONSE or insights_wrapper.set_log_response:
                insights_wrapper.log(
                    response_dict, insights_type=InsightsTypes.INFER_RESPONSE)

        return InferenceResponse(**response_dict)
Beispiel #2
0
def xgboost_inference_request(inference_request: InferenceRequest) -> InferenceRequest:
    # Reshape to 2D array, matching the input data to xgboost_model
    single_input = inference_request.inputs[0]
    single_input.data = single_input.data = [[1, 2, 3]]

    # Keep only a single input
    inference_request.inputs = [single_input]

    return inference_request
Beispiel #3
0
def xgboost_inference_request(
        inference_request: InferenceRequest) -> InferenceRequest:
    # Reshape to 2D array, matching the input data to xgboost_model
    single_input = inference_request.inputs[0]
    single_input.data = TensorData.parse_obj([[1, 2, 3]])
    single_input.shape = [1, 3]

    # Keep only a single input
    inference_request.inputs = [single_input]

    return inference_request
Beispiel #4
0
def test_content_types(tensor_spec: TensorSpec, request_input: RequestInput):
    input_schema = Schema(inputs=[tensor_spec])

    inference_request = InferenceRequest(
        parameters=Parameters(content_type=PandasCodec.ContentType),
        inputs=[request_input],
    )
    data = decode_inference_request(inference_request)

    # _enforce_schema will raise if something fails
    _enforce_schema(data, input_schema)
Beispiel #5
0
    async def _send_request():
        # Change the UUID so that it's a new one
        pred_id = generate_uuid()

        # Generate random data to ensure we catch any out-of-order issues
        request_input = inference_request.inputs[0]
        request_input.data = TensorData(
            __root__=[random.randint(1, 100) for _ in range(3)]
        )
        new_req = InferenceRequest(id=pred_id, inputs=[request_input])
        internal_id, _ = await adaptive_batcher._queue_request(new_req)

        return internal_id, new_req
Beispiel #6
0
def test_decode_request_inputs(sum_model_settings: ModelSettings,
                               request_input: RequestInput, expected: Any):
    request = InferenceRequest(inputs=[request_input])
    request = codec_middleware(request, sum_model_settings)

    if expected is None:
        assert not request.inputs[0].parameters
    else:
        decoded = getattr(request.inputs[0].parameters, DecodedParameterName)
        if isinstance(expected, np.ndarray):
            np.testing.assert_array_equal(decoded, expected)  # type: ignore
        else:
            assert decoded == expected  # type: ignore
Beispiel #7
0
    def _check_request(
            self, payload: types.InferenceRequest) -> types.InferenceRequest:
        if len(payload.inputs) != 1:
            raise InferenceError(
                "SKLearnModel only supports a single input tensor "
                f"({len(payload.inputs)} were received)")

        if not payload.outputs:
            # By default, only return the result of `predict()`
            payload.outputs = [types.RequestOutput(name=PREDICT_OUTPUT)]
        else:
            for request_output in payload.outputs:
                if request_output.name not in VALID_OUTPUTS:
                    raise InferenceError(
                        f"SKLearnModel only supports '{PREDICT_OUTPUT}' and "
                        f"'{PREDICT_PROBA_OUTPUT}' as outputs "
                        f"({request_output.name} was received)")

        return payload
Beispiel #8
0
async def test_predict_pytorch(runtime_pytorch: MLflowRuntime):
    # The model used here is the MNIST pytorch example in mlflow:
    # https://github.com/mlflow/mlflow/tree/master/examples/pytorch/MNIST
    # input is a 28*28 image
    data = np.random.randn(1, 28 * 28).astype(np.float32)
    inference_request = InferenceRequest(
        parameters=Parameters(content_type=NumpyCodec.ContentType),
        inputs=[
            RequestInput(
                name="predict",
                shape=data.shape,
                data=data.tolist(),
                datatype="FP32",
            )
        ],
    )
    response = await runtime_pytorch.predict(inference_request)

    outputs = response.outputs
    assert len(outputs) == 1
    assert outputs[0].name == DefaultOutputName
Beispiel #9
0
    async def predict(self, payload: InferenceRequest) -> InferenceResponse:
        print("------ Encoded Input (request) ------")
        as_dict = payload.dict(exclude=_to_exclude)  # type: ignore
        print(json.dumps(as_dict, indent=2))
        print("------ Decoded input (request) ------")
        decoded_request = None
        if payload.parameters:
            decoded_request = getattr(payload.parameters, DecodedParameterName)
        print(decoded_request)

        outputs = []
        for request_input in payload.inputs:
            outputs.append(
                ResponseOutput(
                    name=request_input.name,
                    datatype=request_input.datatype,
                    shape=request_input.shape,
                    data=request_input.data,
                ))

        return InferenceResponse(model_name=self.name, outputs=outputs)
Beispiel #10
0
    async def predict(self, request: InferenceRequest) -> InferenceResponse:

        insights_wrapper = InsightsWrapper(self.insights_manager)
        insights_context.set(insights_wrapper)

        request_dict = request.dict()

        response_dict = self._model.request(request_dict)
        if self._is_coroutine:
            response_dict = await response_dict  # type: ignore

        # TODO: Move to functions declared upfront with logic contained to avoid if
        if self._model.get_insights_mode == InsightRequestModes.ALL:
            self.insights_manager.log(request_dict)
            self.insights_manager.log(response_dict)
        else:
            if self._model.get_insights_mode == InsightRequestModes.REQUEST or insights_wrapper.set_log_request:
                self.insights_manager.log(request_dict)
            if self._model.get_insights_mode == InsightRequestModes.RESPONSE or insights_wrapper.set_log_response:
                self.insights_manager.log(response_dict)

        return InferenceResponse(**response_dict)
Beispiel #11
0
async def test_batcher_cancels_responses(
    adaptive_batcher: AdaptiveBatcher,
    mocker,
):
    message = "This is an error"

    async def _async_exception():
        raise Exception(message)

    num_requests = adaptive_batcher._max_batch_size * 2 + 2

    adaptive_batcher._batcher = mocker.stub("_batcher")
    adaptive_batcher._batcher.side_effect = iter(_async_exception, None)

    requests = [
        InferenceRequest(
            id=generate_uuid(),
            inputs=[
                RequestInput(
                    name="input-0",
                    shape=[1, 3],
                    datatype="INT32",
                    data=[idx, idx + 1, idx + 2],
                )
            ],
        ) for idx in range(num_requests)
    ]

    responses = await asyncio.gather(
        *[adaptive_batcher.predict(request) for request in requests],
        return_exceptions=True,
    )

    for response in responses:
        assert isinstance(response, Exception)
        assert str(response) == message
Beispiel #12
0
from mlserver.codecs import NumpyCodec, StringCodec, PandasCodec
from mlserver.codecs.middleware import DecodedParameterName, codec_middleware
from mlserver.settings import ModelSettings


@pytest.mark.parametrize(
    "inference_request, expected",
    [
        (
            InferenceRequest(
                parameters=Parameters(content_type=PandasCodec.ContentType),
                inputs=[
                    RequestInput(
                        name="foo",
                        shape=[2, 2],
                        data=[1, 2, 3, 4],
                        datatype="INT32",
                        parameters=Parameters(
                            content_type=NumpyCodec.ContentType),
                    ),
                ],
            ),
            pd.DataFrame({"foo": [[1, 2], [3, 4]]}),
        ),
        (
            InferenceRequest(
                parameters=Parameters(content_type=PandasCodec.ContentType),
                inputs=[
                    RequestInput(
                        name="foo",
                        shape=[2, 2],
Beispiel #13
0
    assert merged == expected_request_input
    assert batched._minibatch_sizes == expected_minibatch_sizes


@pytest.mark.parametrize(
    "inference_requests, expected",
    [
        (
            {
                "req-1":
                InferenceRequest(
                    parameters=Parameters(content_type="np"),
                    inputs=[
                        RequestInput(name="foo",
                                     datatype="INT32",
                                     data=[1, 2, 3],
                                     shape=[1, 3])
                    ],
                ),
                "req-2":
                InferenceRequest(
                    parameters=Parameters(foo="bar"),
                    inputs=[
                        RequestInput(name="foo",
                                     datatype="INT32",
                                     data=[4, 5, 6],
                                     shape=[1, 3])
                    ],
                ),
            },
Beispiel #14
0
def test_first_input_decode(inference_request: InferenceRequest, expected: np.ndarray):
    inference_request.inputs = [inference_request.inputs[0]]
    first_input = NumpyRequestCodec.decode(inference_request)

    np.testing.assert_equal(first_input, expected)
Beispiel #15
0
    assert inference_response == expected


@pytest.mark.parametrize(
    "inference_request, expected",
    [
        (
            InferenceRequest(inputs=[
                RequestInput(
                    name="a",
                    data=[1, 2, 3],
                    datatype="FP32",
                    shape=[1, 3],
                    parameters=Parameters(
                        _decoded_payload=np.array([[1, 2, 3]])),
                ),
                RequestInput(
                    name="b",
                    data=b"hello world",
                    datatype="BYTES",
                    shape=[1, 11],
                    parameters=Parameters(_decoded_payload=["hello world"]),
                ),
            ]),
            pd.DataFrame({
                "a": [np.array([1, 2, 3])],
                "b": ["hello world"]
            }),
        ),
        (
            InferenceRequest(inputs=[
Beispiel #16
0
def inference_request() -> InferenceRequest:
    return InferenceRequest(inputs=[
        RequestInput(
            name="payload", shape=[4], data=[1, 2, 3, 4], datatype="FP32")
    ])
Beispiel #17
0
def inference_request(model_settings: ModelSettings) -> InferenceRequest:
    payload_path = os.path.join(TESTDATA_PATH, "inference-request.json")
    inference_request = InferenceRequest.parse_file(payload_path)
    return codec_middleware(inference_request, model_settings)
Beispiel #18
0
 async def predict(self, payload: InferenceRequest) -> InferenceResponse:
     prediction = self._pipeline.request(payload.dict())
     return InferenceResponse(**prediction)
Beispiel #19
0
from mlserver.codecs.base import CodecError
from mlserver.codecs.utils import (
    FirstInputRequestCodec,
    DecodedParameterName,
)
from mlserver.codecs.numpy import NumpyRequestCodec


@pytest.mark.parametrize(
    "inference_request, expected",
    [
        (
            InferenceRequest(
                inputs=[
                    RequestInput(
                        name="foo", shape=[2, 2], data=[1, 2, 3, 4], datatype="INT32"
                    )
                ]
            ),
            np.array([[1, 2], [3, 4]]),
        ),
        (
            InferenceRequest(
                inputs=[
                    RequestInput(
                        name="foo",
                        shape=[2, 2],
                        data=[1, 2, 3, 4],
                        datatype="INT32",
                        parameters=Parameters(**{DecodedParameterName: np.array([23])}),
                    )
Beispiel #20
0
    for response in responses:
        assert isinstance(response, Exception)
        assert str(response) == message


@pytest.mark.parametrize(
    "requests",
    [
        [
            InferenceRequest(
                id=f"request-{idx}",
                inputs=[
                    RequestInput(
                        name="input-0",
                        shape=[1, 3],
                        datatype="INT32",
                        data=[idx, idx + 1, idx + 2],
                    )
                ],
            )
            # 10 is the max_batch_size for sum_model
            # Make sure one batch is only half-full
            for idx in range(10 * 2 + 2)
        ],
        [
            InferenceRequest(
                id="large-request",
                inputs=[
                    # 10 is the max batch size, so we send a minibatch with
                    # 20 entries
Beispiel #21
0
def inference_request() -> InferenceRequest:
    payload_path = os.path.join(TESTDATA_PATH, "inference-request.json")
    return InferenceRequest.parse_file(payload_path)
Beispiel #22
0
def sklearn_inference_request(inference_request: InferenceRequest) -> InferenceRequest:
    # Keep only a single input
    inference_request.inputs = inference_request.inputs[:1]

    return inference_request