Esempio n. 1
0
def test_schema_enforcement_named_tensor_schema_1d():
    m = Model()
    input_schema = Schema([
        TensorSpec(np.dtype(np.uint64), (-1, ), "a"),
        TensorSpec(np.dtype(np.float32), (-1, ), "b")
    ])
    m.signature = ModelSignature(inputs=input_schema)
    pyfunc_model = PyFuncModel(model_meta=m, model_impl=TestModel())
    pdf = pd.DataFrame(data=[[0, 0], [1, 1]], columns=["a", "b"])
    pdf["a"] = pdf["a"].astype(np.uint64)
    pdf["b"] = pdf["a"].astype(np.float32)
    d_inp = {
        "a": np.array(pdf["a"], dtype=np.uint64),
        "b": np.array(pdf["b"], dtype=np.float32),
    }

    # test dataframe input works for 1d tensor specs and input is converted to dict
    res = pyfunc_model.predict(pdf)
    assert _compare_exact_tensor_dict_input(res, d_inp)
    expected_types = dict(
        zip(input_schema.input_names(), input_schema.input_types()))
    actual_types = {k: v.dtype for k, v in res.items()}
    assert expected_types == actual_types

    # test that dictionary works too
    res = pyfunc_model.predict(d_inp)
    assert res == d_inp
    expected_types = dict(
        zip(input_schema.input_names(), input_schema.input_types()))
    actual_types = {k: v.dtype for k, v in res.items()}
    assert expected_types == actual_types
Esempio n. 2
0
def test_parse_tf_serving_dictionary():
    def assert_result(result, expected_result):
        assert result.keys() == expected_result.keys()
        for key in result:
            assert (result[key] == expected_result[key]).all()

    # instances are correctly aggregated to dict of input name -> tensor
    tfserving_input = {
        "instances": [
            {"a": "s1", "b": 1.1, "c": [1, 2, 3]},
            {"a": "s2", "b": 2.2, "c": [4, 5, 6]},
            {"a": "s3", "b": 3.3, "c": [7, 8, 9]},
        ]
    }
    # Without Schema
    result = parse_tf_serving_input(tfserving_input)
    expected_result_no_schema = {
        "a": np.array(["s1", "s2", "s3"]),
        "b": np.array([1.1, 2.2, 3.3], dtype="float64"),
        "c": np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype="int64"),
    }
    assert_result(result, expected_result_no_schema)

    # With schema
    schema = Schema(
        [
            TensorSpec(np.dtype("object"), [-1], "a"),
            TensorSpec(np.dtype("float32"), [-1], "b"),
            TensorSpec(np.dtype("int32"), [-1], "c"),
        ]
    )
    result = parse_tf_serving_input(tfserving_input, schema)
    expected_result_schema = {
        "a": np.array(["s1", "s2", "s3"]),
        "b": np.array([1.1, 2.2, 3.3], dtype="float32"),
        "c": np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype="int32"),
    }
    assert_result(result, expected_result_schema)

    # input provided as a dict
    tfserving_input = {
        "inputs": {
            "a": ["s1", "s2", "s3"],
            "b": [1.1, 2.2, 3.3],
            "c": [[1, 2, 3], [4, 5, 6], [7, 8, 9]],
        }
    }
    # Without Schema
    result = parse_tf_serving_input(tfserving_input)
    assert_result(result, expected_result_no_schema)

    # With Schema
    result = parse_tf_serving_input(tfserving_input, schema)
    assert_result(result, expected_result_schema)
Esempio n. 3
0
def test_schema_enforcement_single_named_tensor_schema():
    m = Model()
    input_schema = Schema([TensorSpec(np.dtype(np.uint64), (-1, 2), "a")])
    m.signature = ModelSignature(inputs=input_schema)
    pyfunc_model = PyFuncModel(model_meta=m, model_impl=TestModel())
    inp = {
        "a": np.array([[0, 0], [1, 1]], dtype=np.uint64),
    }

    # sanity test that dictionary with correct input works
    res = pyfunc_model.predict(inp)
    assert res == inp
    expected_types = dict(
        zip(input_schema.input_names(), input_schema.input_types()))
    actual_types = {k: v.dtype for k, v in res.items()}
    assert expected_types == actual_types

    # test single np.ndarray input works and is converted to dictionary
    res = pyfunc_model.predict(inp["a"])
    assert res == inp
    expected_types = dict(
        zip(input_schema.input_names(), input_schema.input_types()))
    actual_types = {k: v.dtype for k, v in res.items()}
    assert expected_types == actual_types

    # test list does not work
    with pytest.raises(MlflowException) as ex:
        pyfunc_model.predict([[0, 0], [1, 1]])
    assert "Model is missing inputs ['a']" in str(ex)
def test_tensor_schema_enforcement_no_col_names():
    m = Model()
    input_schema = Schema([TensorSpec(np.dtype(np.float32), (-1, 3))])
    m.signature = ModelSignature(inputs=input_schema)
    pyfunc_model = PyFuncModel(model_meta=m, model_impl=TestModel())
    test_data = np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]], dtype=np.float32)

    # Can call with numpy array of correct shape
    assert np.array_equal(pyfunc_model.predict(test_data), test_data)

    # Or can call with a dataframe
    assert np.array_equal(pyfunc_model.predict(pd.DataFrame(test_data)),
                          test_data)

    # Can not call with a list
    with pytest.raises(
            MlflowException,
            match=
            "This model contains a tensor-based model signature with no input names",
    ):
        pyfunc_model.predict([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]])

    # Can not call with a dict
    with pytest.raises(
            MlflowException,
            match=
            "This model contains a tensor-based model signature with no input names",
    ):
        pyfunc_model.predict({"blah": test_data})

    # Can not call with a np.ndarray of a wrong shape
    with pytest.raises(
            MlflowException,
            match=re.escape(
                "Shape of input (2, 2) does not match expected shape (-1, 3)"),
    ):
        pyfunc_model.predict(np.array([[1.0, 2.0], [4.0, 5.0]]))

    # Can not call with a np.ndarray of a wrong type
    with pytest.raises(
            MlflowException,
            match="dtype of input uint32 does not match expected dtype float32"
    ):
        pyfunc_model.predict(test_data.astype(np.uint32))

    # Can call with a np.ndarray with more elements along variable axis
    test_data2 = np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]],
                          dtype=np.float32)
    assert np.array_equal(pyfunc_model.predict(test_data2), test_data2)

    # Can not call with an empty ndarray
    with pytest.raises(
            MlflowException,
            match=re.escape(
                "Shape of input () does not match expected shape (-1, 3)")):
        pyfunc_model.predict(np.ndarray([]))
Esempio n. 5
0
def test_parse_tf_serving_single_array():
    def assert_result(result, expected_result):
        assert (result == expected_result).all()

    # values for each column are properly converted to a tensor
    arr = [
        [[1, 2, 3], [4, 5, 6], [7, 8, 9]],
        [[3, 2, 1], [6, 5, 4], [9, 8, 7]],
    ]
    tfserving_instances = {"instances": arr}
    tfserving_inputs = {"inputs": arr}

    # Without schema
    instance_result = parse_tf_serving_input(tfserving_instances)
    assert instance_result.shape == (2, 3, 3)
    assert_result(instance_result, np.array(arr, dtype="int64"))

    input_result = parse_tf_serving_input(tfserving_inputs)
    assert input_result.shape == (2, 3, 3)
    assert_result(input_result, np.array(arr, dtype="int64"))

    # Unnamed schema
    schema = Schema([TensorSpec(np.dtype("float32"), [-1])])
    instance_result = parse_tf_serving_input(tfserving_instances, schema)
    assert_result(instance_result, np.array(arr, dtype="float32"))

    input_result = parse_tf_serving_input(tfserving_inputs, schema)
    assert_result(input_result, np.array(arr, dtype="float32"))

    # named schema
    schema = Schema([TensorSpec(np.dtype("float32"), [-1], "a")])
    instance_result = parse_tf_serving_input(tfserving_instances, schema)
    assert isinstance(instance_result, dict)
    assert len(instance_result.keys()) == 1 and "a" in instance_result
    assert_result(instance_result["a"], np.array(arr, dtype="float32"))

    input_result = parse_tf_serving_input(tfserving_inputs, schema)
    assert isinstance(input_result, dict)
    assert len(input_result.keys()) == 1 and "a" in input_result
    assert_result(input_result["a"], np.array(arr, dtype="float32"))
Esempio n. 6
0
def test_tensor_multi_named_schema_enforcement():
    m = Model()
    input_schema = Schema([
        TensorSpec(np.dtype(np.uint64), (-1, 5), "a"),
        TensorSpec(np.dtype(np.short), (-1, 2), "b"),
        TensorSpec(np.dtype(np.float32), (2, -1, 2), "c"),
    ])
    m.signature = ModelSignature(inputs=input_schema)
    pyfunc_model = PyFuncModel(model_meta=m, model_impl=TestModel())
    inp = {
        "a": np.array([[0, 0, 0, 0, 0], [1, 1, 1, 1, 1]], dtype=np.uint64),
        "b": np.array([[0, 0], [1, 1], [2, 2]], dtype=np.short),
        "c": np.array([[[0, 0], [1, 1]], [[2, 2], [3, 3]]], dtype=np.float32),
    }

    # test that missing column raises
    inp1 = {k: v for k, v in inp.items()}
    with pytest.raises(MlflowException) as ex:
        pyfunc_model.predict(inp1.pop("b"))
    assert "Model is missing inputs" in str(ex)

    # test that extra column is ignored
    inp2 = {k: v for k, v in inp.items()}
    inp2["x"] = 1

    # test that extra column is removed
    res = pyfunc_model.predict(inp2)
    assert res == {k: v for k, v in inp.items() if k in {"a", "b", "c"}}
    expected_types = dict(
        zip(input_schema.input_names(), input_schema.input_types()))
    actual_types = {k: v.dtype for k, v in res.items()}
    assert expected_types == actual_types

    # test that variable axes are supported
    inp3 = {
        "a":
        np.array([[0, 0, 0, 0, 0], [1, 1, 1, 1, 1], [2, 2, 2, 2, 2]],
                 dtype=np.uint64),
        "b":
        np.array([[0, 0], [1, 1]], dtype=np.short),
        "c":
        np.array([[[0, 0]], [[2, 2]]], dtype=np.float32),
    }
    res = pyfunc_model.predict(inp3)
    assert _compare_exact_tensor_dict_input(res, inp3)
    expected_types = dict(
        zip(input_schema.input_names(), input_schema.input_types()))
    actual_types = {k: v.dtype for k, v in res.items()}
    assert expected_types == actual_types

    # test that type casting is not supported
    inp4 = {k: v for k, v in inp.items()}
    inp4["a"] = inp4["a"].astype(np.int32)
    with pytest.raises(MlflowException) as ex:
        pyfunc_model.predict(inp4)
    assert "dtype of input int32 does not match expected dtype uint64" in str(
        ex)

    # test wrong shape
    inp5 = {
        "a": np.array([[0, 0, 0, 0]], dtype=np.uint),
        "b": np.array([[0, 0], [1, 1]], dtype=np.short),
        "c": np.array([[[0, 0]]], dtype=np.float32),
    }
    with pytest.raises(MlflowException) as ex:
        pyfunc_model.predict(inp5)
    assert "Shape of input (1, 4) does not match expected shape (-1, 5)" in str(
        ex)

    # test non-dictionary input
    inp6 = [
        np.array([[0, 0, 0, 0, 0]], dtype=np.uint64),
        np.array([[0, 0], [1, 1]], dtype=np.short),
        np.array([[[0, 0]]], dtype=np.float32),
    ]
    with pytest.raises(MlflowException) as ex:
        pyfunc_model.predict(inp6)
    assert "Model is missing inputs ['a', 'b', 'c']." in str(ex)

    # test empty ndarray does not work
    inp7 = {k: v for k, v in inp.items()}
    inp7["a"] = np.array([])
    with pytest.raises(MlflowException) as ex:
        pyfunc_model.predict(inp7)
    assert "Shape of input (0,) does not match expected shape" in str(ex)

    # test dictionary of str -> list does not work
    inp8 = {k: list(v) for k, v in inp.items()}
    with pytest.raises(MlflowException) as ex:
        pyfunc_model.predict(inp8)
    assert "This model contains a tensor-based model signature with input names" in str(
        ex)
    assert (
        "suggests a dictionary input mapping input name to a numpy array, but a dict"
        " with value type <class 'list'> was found") in str(ex)

    # test dataframe input fails at shape enforcement
    pdf = pd.DataFrame(
        data=[[1, 2, 3]],
        columns=["a", "b", "c"],
    )
    pdf["a"] = pdf["a"].astype(np.uint64)
    pdf["b"] = pdf["b"].astype(np.short)
    pdf["c"] = pdf["c"].astype(np.float32)
    with pytest.raises(MlflowException) as ex:
        pyfunc_model.predict(pdf)
    assert "Shape of input (1,) does not match expected shape (-1, 5)" in str(
        ex)
Esempio n. 7
0
def test_dataframe_from_json():
    source = pd.DataFrame(
        {
            "boolean": [True, False, True],
            "string": ["a", "b", "c"],
            "float": np.array([1.2, 2.3, 3.4], dtype=np.float32),
            "double": np.array([1.2, 2.3, 3.4], dtype=np.float64),
            "integer": np.array([3, 4, 5], dtype=np.int32),
            "long": np.array([3, 4, 5], dtype=np.int64),
            "binary": [bytes([1, 2, 3]),
                       bytes([4, 5]),
                       bytes([6])],
            "date_string": ["2018-02-03", "1996-03-02", "2021-03-05"],
        },
        columns=[
            "boolean",
            "string",
            "float",
            "double",
            "integer",
            "long",
            "binary",
            "date_string",
        ],
    )

    jsonable_df = pd.DataFrame(source, copy=True)
    jsonable_df["binary"] = jsonable_df["binary"].map(base64.b64encode)
    schema = Schema([
        ColSpec("boolean", "boolean"),
        ColSpec("string", "string"),
        ColSpec("float", "float"),
        ColSpec("double", "double"),
        ColSpec("integer", "integer"),
        ColSpec("long", "long"),
        ColSpec("binary", "binary"),
        ColSpec("string", "date_string"),
    ])
    parsed = _dataframe_from_json(jsonable_df.to_json(orient="split"),
                                  pandas_orient="split",
                                  schema=schema)
    assert parsed.equals(source)
    parsed = _dataframe_from_json(jsonable_df.to_json(orient="records"),
                                  pandas_orient="records",
                                  schema=schema)
    assert parsed.equals(source)
    # try parsing with tensor schema
    tensor_schema = Schema([
        TensorSpec(np.dtype("bool"), [-1], "boolean"),
        TensorSpec(np.dtype("str"), [-1], "string"),
        TensorSpec(np.dtype("float32"), [-1], "float"),
        TensorSpec(np.dtype("float64"), [-1], "double"),
        TensorSpec(np.dtype("int32"), [-1], "integer"),
        TensorSpec(np.dtype("int64"), [-1], "long"),
        TensorSpec(np.dtype(bytes), [-1], "binary"),
    ])
    parsed = _dataframe_from_json(jsonable_df.to_json(orient="split"),
                                  pandas_orient="split",
                                  schema=tensor_schema)

    # NB: tensor schema does not automatically decode base64 encoded bytes.
    assert parsed.equals(jsonable_df)
    parsed = _dataframe_from_json(jsonable_df.to_json(orient="records"),
                                  pandas_orient="records",
                                  schema=tensor_schema)

    # NB: tensor schema does not automatically decode base64 encoded bytes.
    assert parsed.equals(jsonable_df)

    # Test parse with TesnorSchema with a single tensor
    tensor_schema = Schema([TensorSpec(np.dtype("float32"), [-1, 3])])
    source = pd.DataFrame(
        {
            "a": np.array([1, 2, 3], dtype=np.float32),
            "b": np.array([4.1, 5.2, 6.3], dtype=np.float32),
            "c": np.array([7, 8, 9], dtype=np.float32),
        },
        columns=["a", "b", "c"],
    )
    assert source.equals(
        _dataframe_from_json(source.to_json(orient="split"),
                             pandas_orient="split",
                             schema=tensor_schema))
    assert source.equals(
        _dataframe_from_json(source.to_json(orient="records"),
                             pandas_orient="records",
                             schema=tensor_schema))