def test_signature_and_examples_are_saved_correctly(sklearn_knn_model, iris_data): data = iris_data signature_ = infer_signature(*data) example_ = data[0][:3, ] for signature in (None, signature_): for example in (None, example_): with TempDir() as tmp: with open(tmp.path("skmodel"), "wb") as f: pickle.dump(sklearn_knn_model, f) path = tmp.path("model") kiwi.pyfunc.save_model( path=path, data_path=tmp.path("skmodel"), loader_module=os.path.basename(__file__)[:-3], code_path=[__file__], signature=signature, input_example=example) mlflow_model = Model.load(path) assert signature == mlflow_model.signature if example is None: assert mlflow_model.saved_input_example_info is None else: assert all((_read_example(mlflow_model, path) == example).all())
def test_signature_and_examples_are_saved_correctly(iris_data, main_scoped_model_class): def test_predict(sk_model, model_input): return sk_model.predict(model_input) * 2 data = iris_data signature_ = infer_signature(*data) example_ = data[0][:3, ] for signature in (None, signature_): for example in (None, example_): with TempDir() as tmp: path = tmp.path("model") kiwi.pyfunc.save_model( path=path, artifacts={}, python_model=main_scoped_model_class(test_predict), signature=signature, input_example=example) mlflow_model = Model.load(path) assert signature == mlflow_model.signature if example is None: assert mlflow_model.saved_input_example_info is None else: assert all((_read_example(mlflow_model, path) == example).all())
def test_parse_with_schema(pandas_df_with_all_types): schema = Schema([ColSpec(c, c) for c in pandas_df_with_all_types.columns]) df = _shuffle_pdf(pandas_df_with_all_types) json_str = json.dumps(df.to_dict(orient="split"), cls=NumpyEncoder) df = pyfunc_scoring_server.parse_json_input(json_str, orient="split", schema=schema) json_str = json.dumps(df.to_dict(orient="records"), cls=NumpyEncoder) df = pyfunc_scoring_server.parse_json_input(json_str, orient="records", schema=schema) assert schema == infer_signature(df[schema.column_names()]).inputs # The current behavior with pandas json parse with type hints is weird. In some cases, the # types are forced ignoting overflow and loss of precision: bad_df = """{ "columns":["bad_integer", "bad_float", "bad_string", "bad_boolean"], "data":[ [9007199254740991.0, 1.1, 1, 1.5], [9007199254740992.0, 9007199254740992.0, 2, 0], [9007199254740994.0, 3.3, 3, "some arbitrary string"] ] }""" schema = Schema([ ColSpec("integer", "bad_integer"), ColSpec("float", "bad_float"), ColSpec("float", "good_float"), ColSpec("string", "bad_string"), ColSpec("boolean", "bad_boolean") ]) df = pyfunc_scoring_server.parse_json_input(bad_df, orient="split", schema=schema) # Unfortunately, the current behavior of pandas parse is to force numbers to int32 even if # they don't fit: assert df["bad_integer"].dtype == np.int32 assert all(df["bad_integer"] == [-2147483648, -2147483648, -2147483648]) # The same goes for floats: assert df["bad_float"].dtype == np.float32 assert all(df["bad_float"] == np.array([1.1, 9007199254740992, 3.3], dtype=np.float32))\ # However bad string is recognized as int64: assert all(df["bad_string"] == np.array([1, 2, 3], dtype=np.object)) # Boolean is forced - zero and empty string is false, everything else is true: assert df["bad_boolean"].dtype == np.bool assert all(df["bad_boolean"] == [True, False, True])
def test_signature_and_examples_are_saved_correctly(sequential_model, data): model = sequential_model signature_ = infer_signature(*data) example_ = data[0].head(3) for signature in (None, signature_): for example in (None, example_): with TempDir() as tmp: path = tmp.path("model") kiwi.pytorch.save_model(model, path=path, signature=signature, input_example=example) mlflow_model = Model.load(path) assert signature == mlflow_model.signature if example is None: assert mlflow_model.saved_input_example_info is None else: assert all((_read_example(mlflow_model, path) == example).all())
def test_signature_and_examples_are_saved_correctly(xgb_model): model = xgb_model.model for signature in (None, infer_signature(xgb_model.inference_dataframe)): for example in (None, xgb_model.inference_dataframe.head(3)): with TempDir() as tmp: path = tmp.path("model") kiwi.xgboost.save_model(xgb_model=model, path=path, signature=signature, input_example=example) mlflow_model = Model.load(path) assert signature == mlflow_model.signature if example is None: assert mlflow_model.saved_input_example_info is None else: assert all((_read_example(mlflow_model, path) == example).all())
def test_model_export_with_signature_and_examples(iris_df, spark_model_iris): _, _, iris_spark_df = iris_df signature_ = infer_signature(iris_spark_df) example_ = iris_spark_df.toPandas().head(3) for signature in (None, signature_): for example in (None, example_): with TempDir() as tmp: path = tmp.path("model") sparkm.save_model(spark_model_iris.model, path=path, signature=signature, input_example=example) mlflow_model = Model.load(path) assert signature == mlflow_model.signature if example is None: assert mlflow_model.saved_input_example_info is None else: assert all((_read_example(mlflow_model, path) == example).all())
def test_signature_and_examples_are_saved_correctly(onnx_model, data, onnx_custom_env): import kiwi.onnx model = onnx_model signature_ = infer_signature(*data) example_ = data[0].head(3) for signature in (None, signature_): for example in (None, example_): with TempDir() as tmp: path = tmp.path("model") kiwi.onnx.save_model(model, path=path, conda_env=onnx_custom_env, signature=signature, input_example=example) mlflow_model = Model.load(path) assert signature == mlflow_model.signature if example is None: assert mlflow_model.saved_input_example_info is None else: assert all((_read_example(mlflow_model, path) == example).all())
def test_signature_and_examples_are_saved_correctly(h2o_iris_model): model = h2o_iris_model.model signature_ = infer_signature(h2o_iris_model.inference_data.as_data_frame()) example_ = h2o_iris_model.inference_data.as_data_frame().head(3) for signature in (None, signature_): for example in (None, example_): with TempDir() as tmp: path = tmp.path("model") kiwi.h2o.save_model(model, path=path, signature=signature, input_example=example) mlflow_model = Model.load(path) assert signature == mlflow_model.signature if example is None: assert mlflow_model.saved_input_example_info is None else: assert all((_read_example(mlflow_model, path) == example).all())
def test_signature_and_examples_are_saved_correctly(gluon_model, model_data): model = gluon_model signature_ = infer_signature(model_data[0].asnumpy()) example_ = model_data[0].asnumpy()[:3, ] for signature in (None, signature_): for example in (None, example_): with TempDir() as tmp: path = tmp.path("model") kiwi.gluon.save_model(model, path=path, signature=signature, input_example=example) mlflow_model = Model.load(path) assert signature == mlflow_model.signature if example is None: assert mlflow_model.saved_input_example_info is None else: assert all((_read_example(mlflow_model, path) == example).all())
def test_model_export_with_schema_and_examples(spacy_model_with_data): spacy_model = spacy_model_with_data.model signature_ = infer_signature(spacy_model_with_data.inference_data) example_ = spacy_model_with_data.inference_data.head(3) for signature in (None, signature_): for example in (None, example_): print(signature is None, example is None) with TempDir() as tmp: path = tmp.path("model") kiwi.spacy.save_model(spacy_model, path=path, signature=signature, input_example=example) mlflow_model = Model.load(path) assert signature == mlflow_model.signature if example is None: assert mlflow_model.saved_input_example_info is None else: assert all((_read_example(mlflow_model, path) == example).all())
def test_signature_and_examples_are_saved_correctly(sklearn_knn_model): data = sklearn_knn_model.inference_data model = sklearn_knn_model.model signature_ = infer_signature(data) example_ = data[:3, ] for signature in (None, signature_): for example in (None, example_): with TempDir() as tmp: path = tmp.path("model") kiwi.sklearn.save_model(model, path=path, signature=signature, input_example=example) mlflow_model = Model.load(path) assert signature == mlflow_model.signature if example is None: assert mlflow_model.saved_input_example_info is None else: assert all((_read_example(mlflow_model, path) == example).all())
def test_schema_and_examples_are_save_correctly(saved_tf_iris_model, model_path): (train_x, train_y), _ = iris_data_utils.load_data() X = pd.DataFrame(train_x) y = pd.Series(train_y) for signature in (None, infer_signature(X, y)): for example in (None, X.head(3)): with TempDir() as tmp: path = tmp.path("model") kiwi.tensorflow.save_model( tf_saved_model_dir=saved_tf_iris_model.path, tf_meta_graph_tags=saved_tf_iris_model.meta_graph_tags, tf_signature_def_key=saved_tf_iris_model.signature_def_key, path=path, signature=signature, input_example=example) mlflow_model = Model.load(path) assert signature == mlflow_model.signature if example is None: assert mlflow_model.saved_input_example_info is None else: assert all((_read_example(mlflow_model, path) == example).all())