def test_schema_enforcement_named_tensor_schema_1d(): m = Model() input_schema = Schema([ TensorSpec(np.dtype(np.uint64), (-1, ), "a"), TensorSpec(np.dtype(np.float32), (-1, ), "b") ]) m.signature = ModelSignature(inputs=input_schema) pyfunc_model = PyFuncModel(model_meta=m, model_impl=TestModel()) pdf = pd.DataFrame(data=[[0, 0], [1, 1]], columns=["a", "b"]) pdf["a"] = pdf["a"].astype(np.uint64) pdf["b"] = pdf["a"].astype(np.float32) d_inp = { "a": np.array(pdf["a"], dtype=np.uint64), "b": np.array(pdf["b"], dtype=np.float32), } # test dataframe input works for 1d tensor specs and input is converted to dict res = pyfunc_model.predict(pdf) assert _compare_exact_tensor_dict_input(res, d_inp) expected_types = dict( zip(input_schema.input_names(), input_schema.input_types())) actual_types = {k: v.dtype for k, v in res.items()} assert expected_types == actual_types # test that dictionary works too res = pyfunc_model.predict(d_inp) assert res == d_inp expected_types = dict( zip(input_schema.input_names(), input_schema.input_types())) actual_types = {k: v.dtype for k, v in res.items()} assert expected_types == actual_types
def test_missing_value_hint_is_displayed_when_it_should(): m = Model() input_schema = Schema([ColSpec("integer", "a")]) m.signature = ModelSignature(inputs=input_schema) pyfunc_model = PyFuncModel(model_meta=m, model_impl=TestModel()) pdf = pd.DataFrame( data=[[1], [None]], columns=["a"], ) with pytest.raises(MlflowException) as ex: pyfunc_model.predict(pdf) hint = "Hint: the type mismatch is likely caused by missing values." assert "Incompatible input types" in str(ex.value.message) assert hint in str(ex.value.message) pdf = pd.DataFrame( data=[[1.5], [None]], columns=["a"], ) with pytest.raises(MlflowException) as ex: pyfunc_model.predict(pdf) assert "Incompatible input types" in str(ex) assert hint not in str(ex.value.message) pdf = pd.DataFrame(data=[[1], [2]], columns=["a"], dtype=np.float64) with pytest.raises(MlflowException) as ex: pyfunc_model.predict(pdf) assert "Incompatible input types" in str(ex.value.message) assert hint not in str(ex.value.message)
def test_schema_enforcement_single_named_tensor_schema(): m = Model() input_schema = Schema([TensorSpec(np.dtype(np.uint64), (-1, 2), "a")]) m.signature = ModelSignature(inputs=input_schema) pyfunc_model = PyFuncModel(model_meta=m, model_impl=TestModel()) inp = { "a": np.array([[0, 0], [1, 1]], dtype=np.uint64), } # sanity test that dictionary with correct input works res = pyfunc_model.predict(inp) assert res == inp expected_types = dict( zip(input_schema.input_names(), input_schema.input_types())) actual_types = {k: v.dtype for k, v in res.items()} assert expected_types == actual_types # test single np.ndarray input works and is converted to dictionary res = pyfunc_model.predict(inp["a"]) assert res == inp expected_types = dict( zip(input_schema.input_names(), input_schema.input_types())) actual_types = {k: v.dtype for k, v in res.items()} assert expected_types == actual_types # test list does not work with pytest.raises(MlflowException) as ex: pyfunc_model.predict([[0, 0], [1, 1]]) assert "Model is missing inputs ['a']" in str(ex)
def test_schema_enforcement_no_col_names(): class TestModel(object): @staticmethod def predict(pdf): return pdf m = Model() input_schema = Schema( [ColSpec("double"), ColSpec("double"), ColSpec("double")]) m.signature = ModelSignature(inputs=input_schema) pyfunc_model = PyFuncModel(model_meta=m, model_impl=TestModel()) test_data = [[1.0, 2.0, 3.0]] # Can call with just a list assert pyfunc_model.predict(test_data).equals(pd.DataFrame(test_data)) # Or can call with a DataFrame without column names assert pyfunc_model.predict(pd.DataFrame(test_data)).equals( pd.DataFrame(test_data)) # Or with column names! pdf = pd.DataFrame(data=test_data, columns=["a", "b", "c"]) assert pyfunc_model.predict(pdf).equals(pdf) # Must provide the right number of arguments with pytest.raises(MlflowException) as ex: pyfunc_model.predict([[1.0, 2.0]]) assert "the provided input only has 2 columns." in str(ex) # Must provide the right types with pytest.raises(MlflowException) as ex: pyfunc_model.predict([[1, 2, 3]]) assert "Can not safely convert int64 to float64" in str(ex) # Can only provide data frames or lists... with pytest.raises(MlflowException) as ex: pyfunc_model.predict(set([1, 2, 3])) assert "Expected input to be DataFrame or list. Found: set" in str(ex)
def test_schema_enforcement(): class TestModel(object): @staticmethod def predict(pdf): return pdf m = Model() input_schema = Schema([ ColSpec("integer", "a"), ColSpec("long", "b"), ColSpec("float", "c"), ColSpec("double", "d"), ColSpec("boolean", "e"), ColSpec("string", "g"), ColSpec("binary", "f"), ]) m.signature = ModelSignature(inputs=input_schema) pyfunc_model = PyFuncModel(model_meta=m, model_impl=TestModel()) pdf = pd.DataFrame( data=[[1, 2, 3, 4, True, "x", bytes([1])]], columns=["b", "d", "a", "c", "e", "g", "f"], dtype=np.object, ) pdf["a"] = pdf["a"].astype(np.int32) pdf["b"] = pdf["b"].astype(np.int64) pdf["c"] = pdf["c"].astype(np.float32) pdf["d"] = pdf["d"].astype(np.float64) # test that missing column raises with pytest.raises(MlflowException) as ex: res = pyfunc_model.predict(pdf[["b", "d", "a", "e", "g", "f"]]) assert "Model input is missing columns" in str(ex) # test that extra column is ignored pdf["x"] = 1 # test that columns are reordered, extra column is ignored res = pyfunc_model.predict(pdf) assert all((res == pdf[input_schema.column_names()]).all()) expected_types = dict( zip(input_schema.column_names(), input_schema.pandas_types())) actual_types = res.dtypes.to_dict() assert expected_types == actual_types # Test conversions # 1. long -> integer raises pdf["a"] = pdf["a"].astype(np.int64) with pytest.raises(MlflowException) as ex: pyfunc_model.predict(pdf) assert "Incompatible input types" in str(ex) pdf["a"] = pdf["a"].astype(np.int32) # 2. integer -> long works pdf["b"] = pdf["b"].astype(np.int32) res = pyfunc_model.predict(pdf) assert all((res == pdf[input_schema.column_names()]).all()) assert res.dtypes.to_dict() == expected_types pdf["b"] = pdf["b"].astype(np.int64) # 3. double -> float raises pdf["c"] = pdf["c"].astype(np.float64) with pytest.raises(MlflowException) as ex: pyfunc_model.predict(pdf) assert "Incompatible input types" in str(ex) pdf["c"] = pdf["c"].astype(np.float32) # 4. float -> double works pdf["d"] = pdf["d"].astype(np.float32) res = pyfunc_model.predict(pdf) assert res.dtypes.to_dict() == expected_types assert "Incompatible input types" in str(ex) pdf["d"] = pdf["d"].astype(np.int64) # 5. floats -> ints raises pdf["c"] = pdf["c"].astype(np.int32) with pytest.raises(MlflowException) as ex: pyfunc_model.predict(pdf) assert "Incompatible input types" in str(ex) pdf["c"] = pdf["c"].astype(np.float32) pdf["d"] = pdf["d"].astype(np.int64) with pytest.raises(MlflowException) as ex: pyfunc_model.predict(pdf) assert "Incompatible input types" in str(ex) pdf["d"] = pdf["d"].astype(np.float64) # 6. ints -> floats raises pdf["a"] = pdf["a"].astype(np.float32) with pytest.raises(MlflowException) as ex: pyfunc_model.predict(pdf) assert "Incompatible input types" in str(ex) pdf["a"] = pdf["a"].astype(np.int32) pdf["b"] = pdf["b"].astype(np.float64) with pytest.raises(MlflowException) as ex: pyfunc_model.predict(pdf) pdf["b"] = pdf["b"].astype(np.int64) assert "Incompatible input types" in str(ex) # 7. objects work pdf["b"] = pdf["b"].astype(np.object) pdf["d"] = pdf["d"].astype(np.object) pdf["e"] = pdf["e"].astype(np.object) pdf["f"] = pdf["f"].astype(np.object) pdf["g"] = pdf["g"].astype(np.object) res = pyfunc_model.predict(pdf) assert res.dtypes.to_dict() == expected_types
def test_tensor_schema_enforcement_no_col_names(): m = Model() input_schema = Schema([TensorSpec(np.dtype(np.float32), (-1, 3))]) m.signature = ModelSignature(inputs=input_schema) pyfunc_model = PyFuncModel(model_meta=m, model_impl=TestModel()) test_data = np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]], dtype=np.float32) # Can call with numpy array of correct shape assert np.array_equal(pyfunc_model.predict(test_data), test_data) # Or can call with a dataframe assert np.array_equal(pyfunc_model.predict(pd.DataFrame(test_data)), test_data) # Can not call with a list with pytest.raises(MlflowException) as ex: pyfunc_model.predict([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]) assert "This model contains a tensor-based model signature with no input names" in str( ex) # Can not call with a dict with pytest.raises(MlflowException) as ex: pyfunc_model.predict({"blah": test_data}) assert "This model contains a tensor-based model signature with no input names" in str( ex) # Can not call with a np.ndarray of a wrong shape with pytest.raises(MlflowException) as ex: pyfunc_model.predict(np.array([[1.0, 2.0], [4.0, 5.0]])) assert "Shape of input (2, 2) does not match expected shape (-1, 3)" in str( ex) # Can not call with a np.ndarray of a wrong type with pytest.raises(MlflowException) as ex: pyfunc_model.predict(test_data.astype(np.uint32)) assert "dtype of input uint32 does not match expected dtype float32" in str( ex) # Can call with a np.ndarray with more elements along variable axis test_data2 = np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]], dtype=np.float32) assert np.array_equal(pyfunc_model.predict(test_data2), test_data2) # Can not call with an empty ndarray with pytest.raises(MlflowException) as ex: pyfunc_model.predict(np.ndarray([])) assert "Shape of input () does not match expected shape (-1, 3)" in str(ex)
def test_column_schema_enforcement_no_col_names(): m = Model() input_schema = Schema( [ColSpec("double"), ColSpec("double"), ColSpec("double")]) m.signature = ModelSignature(inputs=input_schema) pyfunc_model = PyFuncModel(model_meta=m, model_impl=TestModel()) test_data = [[1.0, 2.0, 3.0]] # Can call with just a list assert pyfunc_model.predict(test_data).equals(pd.DataFrame(test_data)) # Or can call with a DataFrame without column names assert pyfunc_model.predict(pd.DataFrame(test_data)).equals( pd.DataFrame(test_data)) # # Or can call with a np.ndarray assert pyfunc_model.predict(pd.DataFrame(test_data).values).equals( pd.DataFrame(test_data)) # Or with column names! pdf = pd.DataFrame(data=test_data, columns=["a", "b", "c"]) assert pyfunc_model.predict(pdf).equals(pdf) # Must provide the right number of arguments with pytest.raises(MlflowException) as ex: pyfunc_model.predict([[1.0, 2.0]]) assert "the provided value only has 2 inputs." in str(ex) # Must provide the right types with pytest.raises(MlflowException) as ex: pyfunc_model.predict([[1, 2, 3]]) assert "Can not safely convert int64 to float64" in str(ex) # Can only provide data type that can be converted to dataframe... with pytest.raises(MlflowException) as ex: pyfunc_model.predict(set([1, 2, 3])) assert "Expected input to be DataFrame or list. Found: set" in str(ex) # 9. dictionaries of str -> list/nparray work d = {"a": [1.0], "b": [2.0], "c": [3.0]} assert pyfunc_model.predict(d).equals(pd.DataFrame(d))
def test_tensor_multi_named_schema_enforcement(): m = Model() input_schema = Schema([ TensorSpec(np.dtype(np.uint64), (-1, 5), "a"), TensorSpec(np.dtype(np.short), (-1, 2), "b"), TensorSpec(np.dtype(np.float32), (2, -1, 2), "c"), ]) m.signature = ModelSignature(inputs=input_schema) pyfunc_model = PyFuncModel(model_meta=m, model_impl=TestModel()) inp = { "a": np.array([[0, 0, 0, 0, 0], [1, 1, 1, 1, 1]], dtype=np.uint64), "b": np.array([[0, 0], [1, 1], [2, 2]], dtype=np.short), "c": np.array([[[0, 0], [1, 1]], [[2, 2], [3, 3]]], dtype=np.float32), } # test that missing column raises inp1 = {k: v for k, v in inp.items()} with pytest.raises(MlflowException) as ex: pyfunc_model.predict(inp1.pop("b")) assert "Model is missing inputs" in str(ex) # test that extra column is ignored inp2 = {k: v for k, v in inp.items()} inp2["x"] = 1 # test that extra column is removed res = pyfunc_model.predict(inp2) assert res == {k: v for k, v in inp.items() if k in {"a", "b", "c"}} expected_types = dict( zip(input_schema.input_names(), input_schema.input_types())) actual_types = {k: v.dtype for k, v in res.items()} assert expected_types == actual_types # test that variable axes are supported inp3 = { "a": np.array([[0, 0, 0, 0, 0], [1, 1, 1, 1, 1], [2, 2, 2, 2, 2]], dtype=np.uint64), "b": np.array([[0, 0], [1, 1]], dtype=np.short), "c": np.array([[[0, 0]], [[2, 2]]], dtype=np.float32), } res = pyfunc_model.predict(inp3) assert _compare_exact_tensor_dict_input(res, inp3) expected_types = dict( zip(input_schema.input_names(), input_schema.input_types())) actual_types = {k: v.dtype for k, v in res.items()} assert expected_types == actual_types # test that type casting is not supported inp4 = {k: v for k, v in inp.items()} inp4["a"] = inp4["a"].astype(np.int32) with pytest.raises(MlflowException) as ex: pyfunc_model.predict(inp4) assert "dtype of input int32 does not match expected dtype uint64" in str( ex) # test wrong shape inp5 = { "a": np.array([[0, 0, 0, 0]], dtype=np.uint), "b": np.array([[0, 0], [1, 1]], dtype=np.short), "c": np.array([[[0, 0]]], dtype=np.float32), } with pytest.raises(MlflowException) as ex: pyfunc_model.predict(inp5) assert "Shape of input (1, 4) does not match expected shape (-1, 5)" in str( ex) # test non-dictionary input inp6 = [ np.array([[0, 0, 0, 0, 0]], dtype=np.uint64), np.array([[0, 0], [1, 1]], dtype=np.short), np.array([[[0, 0]]], dtype=np.float32), ] with pytest.raises(MlflowException) as ex: pyfunc_model.predict(inp6) assert "Model is missing inputs ['a', 'b', 'c']." in str(ex) # test empty ndarray does not work inp7 = {k: v for k, v in inp.items()} inp7["a"] = np.array([]) with pytest.raises(MlflowException) as ex: pyfunc_model.predict(inp7) assert "Shape of input (0,) does not match expected shape" in str(ex) # test dictionary of str -> list does not work inp8 = {k: list(v) for k, v in inp.items()} with pytest.raises(MlflowException) as ex: pyfunc_model.predict(inp8) assert "This model contains a tensor-based model signature with input names" in str( ex) assert ( "suggests a dictionary input mapping input name to a numpy array, but a dict" " with value type <class 'list'> was found") in str(ex) # test dataframe input fails at shape enforcement pdf = pd.DataFrame( data=[[1, 2, 3]], columns=["a", "b", "c"], ) pdf["a"] = pdf["a"].astype(np.uint64) pdf["b"] = pdf["b"].astype(np.short) pdf["c"] = pdf["c"].astype(np.float32) with pytest.raises(MlflowException) as ex: pyfunc_model.predict(pdf) assert "Shape of input (1,) does not match expected shape (-1, 5)" in str( ex)
def test_column_schema_enforcement(): m = Model() input_schema = Schema([ ColSpec("integer", "a"), ColSpec("long", "b"), ColSpec("float", "c"), ColSpec("double", "d"), ColSpec("boolean", "e"), ColSpec("string", "g"), ColSpec("binary", "f"), ColSpec("datetime", "h"), ]) m.signature = ModelSignature(inputs=input_schema) pyfunc_model = PyFuncModel(model_meta=m, model_impl=TestModel()) pdf = pd.DataFrame( data=[[ 1, 2, 3, 4, True, "x", bytes([1]), "2021-01-01 00:00:00.1234567" ]], columns=["b", "d", "a", "c", "e", "g", "f", "h"], dtype=np.object, ) pdf["a"] = pdf["a"].astype(np.int32) pdf["b"] = pdf["b"].astype(np.int64) pdf["c"] = pdf["c"].astype(np.float32) pdf["d"] = pdf["d"].astype(np.float64) pdf["h"] = pdf["h"].astype(np.datetime64) # test that missing column raises with pytest.raises(MlflowException) as ex: res = pyfunc_model.predict(pdf[["b", "d", "a", "e", "g", "f", "h"]]) assert "Model is missing inputs" in str(ex) # test that extra column is ignored pdf["x"] = 1 # test that columns are reordered, extra column is ignored res = pyfunc_model.predict(pdf) assert all((res == pdf[input_schema.input_names()]).all()) expected_types = dict( zip(input_schema.input_names(), input_schema.pandas_types())) # MLflow datetime type in input_schema does not encode precision, so add it for assertions expected_types["h"] = np.dtype("datetime64[ns]") actual_types = res.dtypes.to_dict() assert expected_types == actual_types # Test conversions # 1. long -> integer raises pdf["a"] = pdf["a"].astype(np.int64) with pytest.raises(MlflowException) as ex: pyfunc_model.predict(pdf) assert "Incompatible input types" in str(ex) pdf["a"] = pdf["a"].astype(np.int32) # 2. integer -> long works pdf["b"] = pdf["b"].astype(np.int32) res = pyfunc_model.predict(pdf) assert all((res == pdf[input_schema.input_names()]).all()) assert res.dtypes.to_dict() == expected_types pdf["b"] = pdf["b"].astype(np.int64) # 3. unsigned int -> long works pdf["b"] = pdf["b"].astype(np.uint32) res = pyfunc_model.predict(pdf) assert all((res == pdf[input_schema.input_names()]).all()) assert res.dtypes.to_dict() == expected_types pdf["b"] = pdf["b"].astype(np.int64) # 4. unsigned int -> int raises pdf["a"] = pdf["a"].astype(np.uint32) with pytest.raises(MlflowException) as ex: pyfunc_model.predict(pdf) assert "Incompatible input types" in str(ex) pdf["a"] = pdf["a"].astype(np.int32) # 5. double -> float raises pdf["c"] = pdf["c"].astype(np.float64) with pytest.raises(MlflowException) as ex: pyfunc_model.predict(pdf) assert "Incompatible input types" in str(ex) pdf["c"] = pdf["c"].astype(np.float32) # 6. float -> double works, double -> float does not pdf["d"] = pdf["d"].astype(np.float32) res = pyfunc_model.predict(pdf) assert res.dtypes.to_dict() == expected_types assert "Incompatible input types" in str(ex) pdf["d"] = pdf["d"].astype(np.float64) pdf["c"] = pdf["c"].astype(np.float64) with pytest.raises(MlflowException) as ex: pyfunc_model.predict(pdf) assert "Incompatible input types" in str(ex) pdf["c"] = pdf["c"].astype(np.float32) # 7. int -> float raises pdf["c"] = pdf["c"].astype(np.int32) with pytest.raises(MlflowException) as ex: pyfunc_model.predict(pdf) assert "Incompatible input types" in str(ex) pdf["c"] = pdf["c"].astype(np.float32) # 8. int -> double works pdf["d"] = pdf["d"].astype(np.int32) pyfunc_model.predict(pdf) assert all((res == pdf[input_schema.input_names()]).all()) assert res.dtypes.to_dict() == expected_types # 9. long -> double raises pdf["d"] = pdf["d"].astype(np.int64) with pytest.raises(MlflowException) as ex: pyfunc_model.predict(pdf) assert "Incompatible input types" in str(ex) pdf["d"] = pdf["d"].astype(np.float64) # 10. any float -> any int raises pdf["a"] = pdf["a"].astype(np.float32) with pytest.raises(MlflowException) as ex: pyfunc_model.predict(pdf) assert "Incompatible input types" in str(ex) # 10. any float -> any int raises pdf["a"] = pdf["a"].astype(np.float64) with pytest.raises(MlflowException) as ex: pyfunc_model.predict(pdf) assert "Incompatible input types" in str(ex) pdf["a"] = pdf["a"].astype(np.int32) pdf["b"] = pdf["b"].astype(np.float64) with pytest.raises(MlflowException) as ex: pyfunc_model.predict(pdf) assert "Incompatible input types" in str(ex) pdf["b"] = pdf["b"].astype(np.int64) pdf["b"] = pdf["b"].astype(np.float64) with pytest.raises(MlflowException) as ex: pyfunc_model.predict(pdf) pdf["b"] = pdf["b"].astype(np.int64) assert "Incompatible input types" in str(ex) # 11. objects work pdf["b"] = pdf["b"].astype(np.object) pdf["d"] = pdf["d"].astype(np.object) pdf["e"] = pdf["e"].astype(np.object) pdf["f"] = pdf["f"].astype(np.object) pdf["g"] = pdf["g"].astype(np.object) res = pyfunc_model.predict(pdf) assert res.dtypes.to_dict() == expected_types # 12. datetime64[D] (date only) -> datetime64[x] works pdf["h"] = pdf["h"].astype("datetime64[D]") res = pyfunc_model.predict(pdf) assert res.dtypes.to_dict() == expected_types pdf["h"] = pdf["h"].astype("datetime64[s]") # 13. np.ndarrays can be converted to dataframe but have no columns with pytest.raises(MlflowException) as ex: pyfunc_model.predict(pdf.values) assert "Model is missing inputs" in str(ex) # 14. dictionaries of str -> list/nparray work arr = np.array([1, 2, 3]) d = { "a": arr.astype("int32"), "b": arr.astype("int64"), "c": arr.astype("float32"), "d": arr.astype("float64"), "e": [True, False, True], "g": ["a", "b", "c"], "f": [bytes(0), bytes(1), bytes(1)], "h": np.array(["2020-01-01", "2020-02-02", "2020-03-03"], dtype=np.datetime64), } res = pyfunc_model.predict(d) assert res.dtypes.to_dict() == expected_types # 15. dictionaries of str -> list[list] fail d = { "a": [arr.astype("int32")], "b": [arr.astype("int64")], "c": [arr.astype("float32")], "d": [arr.astype("float64")], "e": [[True, False, True]], "g": [["a", "b", "c"]], "f": [[bytes(0), bytes(1), bytes(1)]], "h": [ np.array(["2020-01-01", "2020-02-02", "2020-03-03"], dtype=np.datetime64) ], } with pytest.raises(MlflowException) as ex: pyfunc_model.predict(d) assert "Incompatible input types" in str(ex) # 16. conversion to dataframe fails d = { "a": [1], "b": [1, 2], "c": [1, 2, 3], } with pytest.raises(MlflowException) as ex: pyfunc_model.predict(d) assert "This model contains a column-based signature, which suggests a DataFrame input." in str( ex)