def test_split_oriented_json_to_numpy_array():
    # test that datatype for "zip" column is not converted to "int64"
    jstr = '{"columns":["zip","cost","count"],"index":[0,1,2],' \
           '"data":[["95120",10.45,-8],["95128",23.0,-1],["95128",12.1,1000]]}'
    df = pyfunc_scoring_server.parse_split_oriented_json_input_to_numpy(jstr)

    assert set(df.columns) == {'zip', 'cost', 'count'}
    assert set(str(dt) for dt in df.dtypes) == {'object', 'float64', 'int64'}
Ejemplo n.º 2
0
    async def invocations(self, request: Request) -> Response:
        """
        This custom handler is meant to mimic the behaviour of the existing
        scoring server in MLflow.
        For details about its implementation, please consult the original
        implementation in the MLflow repository:

            https://github.com/mlflow/mlflow/blob/master/mlflow/pyfunc/scoring_server/__init__.py
        """
        content_type = request.headers.get("content-type", None)
        raw_data = await request.body()
        as_str = raw_data.decode("utf-8")

        if content_type == CONTENT_TYPE_CSV:
            csv_input = StringIO(as_str)
            data = parse_csv_input(csv_input=csv_input)
        elif content_type == CONTENT_TYPE_JSON:
            data = infer_and_parse_json_input(as_str, self._input_schema)
        elif content_type == CONTENT_TYPE_JSON_SPLIT_ORIENTED:
            data = parse_json_input(
                json_input=StringIO(as_str),
                orient="split",
                schema=self._input_schema,
            )
        elif content_type == CONTENT_TYPE_JSON_RECORDS_ORIENTED:
            data = parse_json_input(
                json_input=StringIO(as_str),
                orient="records",
                schema=self._input_schema,
            )
        elif content_type == CONTENT_TYPE_JSON_SPLIT_NUMPY:
            data = parse_split_oriented_json_input_to_numpy(as_str)
        else:
            content_type_error_message = (
                "This predictor only supports the following content types, "
                f"{CONTENT_TYPES}. Got '{content_type}'.")
            raise InferenceError(content_type_error_message)

        try:
            raw_predictions = self._model.predict(data)
        except MlflowException as e:
            raise InferenceError(e.message)
        except Exception:
            error_message = (
                "Encountered an unexpected error while evaluating the model. Verify"
                " that the serialized input Dataframe is compatible with the model for"
                " inference.")
            raise InferenceError(error_message)

        result = StringIO()
        predictions_to_json(raw_predictions, result)
        return Response(content=result.getvalue(),
                        media_type="application/json")
def test_parse_json_input_split_oriented_to_numpy_array():
    size = 200
    data = OrderedDict([("col_m", [random_int(0, 1000) for _ in range(size)]),
                        ("col_z", [random_str(4) for _ in range(size)]),
                        ("col_a", [random_int() for _ in range(size)])])
    p0 = pd.DataFrame.from_dict(data)
    np_array = np.array([[a, b, c] for a, b, c in
                         zip(data['col_m'], data['col_z'], data['col_a'])],
                        dtype=object)
    p1 = pd.DataFrame(np_array).infer_objects()
    p2 = pyfunc_scoring_server.parse_split_oriented_json_input_to_numpy(
        p0.to_json(orient="split"))
    np.testing.assert_array_equal(p1, p2)