Ejemplo n.º 1
0
    async def predict(
            self, inference_request: InferenceRequest) -> InferenceResponse:
        outputs = None
        if self._tokenizer_type == TOKENIZER_TYPE_ENCODE:
            sentences = StringRequestCodec.decode(inference_request)
            tokenised = self._tokeniser(sentences, return_tensors="np")

            outputs = []
            for name, payload in tokenised.items():
                inference_output = NumpyCodec.encode(name=name,
                                                     payload=payload)
                # Transformer's TF GPT2 model expects `INT32` inputs by default, so
                # let's enforce them
                inference_output.datatype = "INT32"
                outputs.append(inference_output)
        else:
            logits = NumpyCodec.decode(inference_request.inputs[0])
            # take the best next token probability of the last token of input ( greedy approach)
            next_token = logits.argmax(axis=2)[0]
            next_token_str = self._tokeniser.decode(
                next_token[-1:],
                skip_special_tokens=True,
                clean_up_tokenization_spaces=True).strip()
            outputs = [StringCodec.encode("next_token", [next_token_str])]

        return InferenceResponse(model_name=self.name,
                                 model_version=self.version,
                                 outputs=outputs)
Ejemplo n.º 2
0
def to_outputs(mlflow_payload: MLflowPayload) -> List[ResponseOutput]:
    codec = NumpyCodec()

    mlflow_payload = _convert_to_tensor_data_if_raw(mlflow_payload)

    return [
        codec.encode(key, value)
        for key, value in mlflow_payload.items()  # type: ignore
    ]
Ejemplo n.º 3
0
def to_outputs(mlflow_payload: MLflowPayload) -> List[ResponseOutput]:
    codec = NumpyCodec()

    if type(mlflow_payload) is np.ndarray:
        # Cast to dict of tensors
        mlflow_payload = {DefaultOutputName: mlflow_payload}  # type: ignore

    return [
        codec.encode(key, value)
        for key, value in mlflow_payload.items()  # type: ignore
    ]
Ejemplo n.º 4
0
def test_numpy_codec(request_input, payload):
    codec = NumpyCodec()
    decoded = codec.decode(request_input)

    np.testing.assert_array_equal(decoded, payload)

    response_output = codec.encode(name="foo", payload=decoded)

    assert response_output.datatype == request_input.datatype
    assert response_output.shape == request_input.shape
    assert response_output.data == request_input.data
Ejemplo n.º 5
0
    async def predict(
            self, payload: types.InferenceRequest) -> types.InferenceResponse:
        payload = self._check_request(payload)
        model_input = payload.inputs[0]
        default_codec = NumpyCodec()
        input_data = self.decode(model_input, default_codec=default_codec)
        y = await self.predict_fn(input_data)

        # TODO: Convert alibi-detect output to v2 protocol
        output_data = np.array(y["data"])

        return types.InferenceResponse(
            model_name=self.name,
            model_version=self.version,
            parameters=y["meta"],
            outputs=[default_codec.encode(name="detect", payload=output_data)],
        )
Ejemplo n.º 6
0
    def _predict_outputs(
        self, payload: types.InferenceRequest
    ) -> List[types.ResponseOutput]:
        model_input = payload.inputs[0]

        default_codec = NumpyCodec()
        input_data = self.decode(model_input, default_codec=default_codec)

        outputs = []
        for request_output in payload.outputs:  # type: ignore
            predict_fn = getattr(self._model, request_output.name)
            y = predict_fn(input_data)

            # TODO: Set datatype (cast from numpy?)
            response_output = default_codec.encode(name=request_output.name, payload=y)
            outputs.append(response_output)

        return outputs
Ejemplo n.º 7
0
class LightGBMModel(MLModel):
    """
    Implementationof the MLModel interface to load and serve `lightgbm` models.
    """
    async def load(self) -> bool:
        model_uri = await get_model_uri(
            self._settings, wellknown_filenames=WELLKNOWN_MODEL_FILENAMES)

        self._model = lgb.Booster(model_file=model_uri)

        self._codec = NumpyCodec()

        self.ready = True
        return self.ready

    async def predict(
            self, payload: types.InferenceRequest) -> types.InferenceResponse:
        payload = self._check_request(payload)

        model_input = payload.inputs[0]
        decoded = self.decode(model_input, default_codec=self._codec)
        prediction = self._model.predict(decoded)

        return types.InferenceResponse(
            model_name=self.name,
            model_version=self.version,
            outputs=[self._codec.encode(name="predict", payload=prediction)],
        )

    def _check_request(
            self, payload: types.InferenceRequest) -> types.InferenceRequest:
        if len(payload.inputs) != 1:
            raise InferenceError(
                "LightGBM only supports a single input tensor "
                f"({len(payload.inputs)} were received)")

        return payload