def test_container_scoring_with_sparkml_and_mleap_outputs_same_format( spark_model_iris, model_path, spark_conda_env): sparkml_model = Model() sparkm.save_model(spark_model_iris.model, path=model_path, conda_env=spark_conda_env, mlflow_model=sparkml_model) assert sparkm.FLAVOR_NAME in sparkml_model.flavors assert mleap.FLAVOR_NAME not in sparkml_model.flavors sparkml_preds = score_model_in_sagemaker_docker_container( model_path=model_path, data=spark_model_iris.inference_df) shutil.rmtree(model_path) assert not os.path.exists(model_path) os.makedirs(model_path) assert os.path.exists(model_path) mleap_model = Model() sparkm.save_model(spark_model_iris.model, path=model_path, sample_input=spark_model_iris.training_df, mlflow_model=mleap_model) assert mleap.FLAVOR_NAME in mleap_model.flavors mleap_preds = score_model_in_sagemaker_docker_container( model_path=model_path, data=spark_model_iris.inference_df) assert isinstance(sparkml_preds, list) assert isinstance(mleap_preds, list) assert len(mleap_preds) == len(sparkml_preds) assert [isinstance(entry, int) for entry in sparkml_preds] assert [isinstance(entry, int) for entry in mleap_preds]
def test_model_deployment(spark_model_iris, model_path, spark_custom_env): sparkm.save_model( spark_model_iris.model, path=model_path, conda_env=spark_custom_env, # Test both spark ml and mleap sample_input=spark_model_iris.spark_df) # 1. score and compare pyfunc deployed in Sagemaker docker container scoring_response_1 = score_model_in_sagemaker_docker_container( model_uri=model_path, data=spark_model_iris.pandas_df, content_type=pyfunc_scoring_server.CONTENT_TYPE_JSON_SPLIT_ORIENTED, flavor=mlflow.pyfunc.FLAVOR_NAME) np.testing.assert_array_almost_equal( spark_model_iris.predictions, np.array(json.loads(scoring_response_1.content)), decimal=4) # 2. score and compare mleap deployed in Sagemaker docker container scoring_response_2 = score_model_in_sagemaker_docker_container( model_uri=model_path, data=spark_model_iris.pandas_df.to_json(orient="split"), content_type=pyfunc_scoring_server.CONTENT_TYPE_JSON, flavor=mlflow.mleap.FLAVOR_NAME) np.testing.assert_array_almost_equal( spark_model_iris.predictions, np.array(json.loads(scoring_response_2.content)), decimal=4)
def test_model_deployment(spark_model_iris, model_path, spark_conda_env): sparkm.save_model(spark_model_iris.model, path=model_path, conda_env=spark_conda_env, # Test both spark ml and mleap sample_input=spark_model_iris.spark_df) # 1. score and compare pyfunc deployed in Sagemaker docker container preds1 = score_model_in_sagemaker_docker_container(model_path=model_path, data=spark_model_iris.pandas_df, flavor=mlflow.pyfunc.FLAVOR_NAME) assert spark_model_iris.predictions == preds1 # 2. score and compare mleap deployed in Sagemaker docker container preds2 = score_model_in_sagemaker_docker_container(model_path=model_path, data=spark_model_iris.pandas_df, flavor=mlflow.mleap.FLAVOR_NAME) assert spark_model_iris.predictions == preds2
def test_model_export(self): path_to_remove = None try: with TempDir(chdr=True, remove_on_exit=False) as tmp: path_to_remove = tmp._path # NOTE: Changed dir to temp dir and use relative paths to get around the way temp # dirs are handled in python. model_pkl = tmp.path("model.pkl") with open(model_pkl, "wb") as f: pickle.dump(self._linear_lr, f) input_path = tmp.path("input_model") conda_env = "conda.env" _mlflow_conda_env(path=tmp.path(conda_env)) pyfunc.save_model(input_path, loader_module="mlflow.sklearn", data_path=model_pkl, conda_env=conda_env) scoring_response = score_model_in_sagemaker_docker_container( model_path=input_path, data=self._iris_df, content_type=pyfunc_scoring_server.CONTENT_TYPE_JSON_SPLIT_ORIENTED, flavor=pyfunc.FLAVOR_NAME) np.testing.assert_array_equal( self._linear_lr_predict, np.array(json.loads(scoring_response.content))) finally: if path_to_remove: try: import shutil shutil.rmtree(path_to_remove) except OSError: print("Failed to remove", path_to_remove)
def test_model_export(self): path_to_remove = None try: with TempDir(chdr=True, remove_on_exit=False) as tmp: path_to_remove = tmp._path # NOTE: Changed dir to temp dir and use relative paths to get around the way temp # dirs are handled in python. model_pkl = tmp.path("model.pkl") with open(model_pkl, "wb") as f: pickle.dump(self._linear_lr, f) input_path = tmp.path("input_model") conda_env = "conda.env" pyfunc.save_model(input_path, loader_module="mlflow.sklearn", data_path=model_pkl, conda_env=_mlflow_conda_env( tmp.path(conda_env))) xpred = score_model_in_sagemaker_docker_container( input_path, self._iris_df) print('expected', self._linear_lr_predict) print('actual ', xpred) np.testing.assert_array_equal(self._linear_lr_predict, xpred) finally: if path_to_remove: try: import shutil shutil.rmtree(path_to_remove) except PermissionError: print("Failed to remove", path_to_remove)
def test_sagemaker_docker_model_scoring_with_default_conda_env( sklearn_logreg_model, main_scoped_model_class, iris_data, tmpdir): sklearn_model_path = os.path.join(str(tmpdir), "sklearn_model") mlflow.sklearn.save_model(sk_model=sklearn_logreg_model, path=sklearn_model_path) def test_predict(sk_model, model_input): return sk_model.predict(model_input) * 2 pyfunc_model_path = os.path.join(str(tmpdir), "pyfunc_model") mlflow.pyfunc.save_model( path=pyfunc_model_path, artifacts={"sk_model": sklearn_model_path}, python_model=main_scoped_model_class(test_predict)) reloaded_pyfunc = mlflow.pyfunc.load_pyfunc(model_uri=pyfunc_model_path) inference_df = pd.DataFrame(iris_data[0]) scoring_response = score_model_in_sagemaker_docker_container( model_uri=pyfunc_model_path, data=inference_df, content_type=pyfunc_scoring_server.CONTENT_TYPE_JSON_SPLIT_ORIENTED, flavor=mlflow.pyfunc.FLAVOR_NAME) deployed_model_preds = pd.DataFrame(json.loads(scoring_response.content)) pandas.testing.assert_frame_equal( deployed_model_preds, pd.DataFrame(reloaded_pyfunc.predict(inference_df)), check_dtype=False, check_less_precise=6)
def test_container_scoring_responds_to_bad_inputs_using_error_message_with_mleap_flavor( spark_model_iris, model_path): mleap_model = Model() sparkm.save_model(spark_model_iris.model, path=model_path, sample_input=spark_model_iris.training_df, mlflow_model=mleap_model) assert mleap.FLAVOR_NAME in mleap_model.flavors mleap_response = score_model_in_sagemaker_docker_container( model_path=model_path, data="invalid") assert "Error" in mleap_response.keys() print(mleap_response["Error"])
def test_sagemaker_docker_model_scoring_with_default_conda_env(model, model_path, data, predicted): mlflow.keras.save_model(keras_model=model, path=model_path, conda_env=None) scoring_response = score_model_in_sagemaker_docker_container( model_uri=model_path, data=data[0], content_type=pyfunc_scoring_server.CONTENT_TYPE_JSON_SPLIT_ORIENTED, flavor=mlflow.pyfunc.FLAVOR_NAME, activity_polling_timeout_seconds=500, ) deployed_model_preds = pd.DataFrame(json.loads(scoring_response.content)) np.testing.assert_array_almost_equal(deployed_model_preds.values, predicted, decimal=4)
def test_input_with_mleap_flavor(spark_model_iris, model_path): """ Test that the mleap model deployed in Docker container throws an error when it gets bad input. """ mleap_model = Model() sparkm.save_model(spark_model_iris.model, path=model_path, sample_input=spark_model_iris.training_df, mlflow_model=mleap_model) assert mleap.FLAVOR_NAME in mleap_model.flavors with pytest.raises(Exception): mleap_response = score_model_in_sagemaker_docker_container( model_path=model_path, data="invalid")
def test_model_deployment(spark_model_iris, model_path, spark_custom_env): sparkm.save_model( spark_model_iris.model, path=model_path, conda_env=spark_custom_env, ) scoring_response = score_model_in_sagemaker_docker_container( model_uri=model_path, data=spark_model_iris.pandas_df, content_type=pyfunc_scoring_server.CONTENT_TYPE_JSON_SPLIT_ORIENTED, flavor=mlflow.pyfunc.FLAVOR_NAME, ) np.testing.assert_array_almost_equal( spark_model_iris.predictions, np.array(json.loads(scoring_response.content)), decimal=4 )
def test_sagemaker_docker_model_scoring_with_default_conda_env( spark_model_iris, model_path): sparkm.save_model(spark_model_iris.model, path=model_path, conda_env=None) scoring_response = score_model_in_sagemaker_docker_container( model_uri=model_path, data=spark_model_iris.pandas_df, content_type=pyfunc_scoring_server.CONTENT_TYPE_JSON, flavor=mlflow.pyfunc.FLAVOR_NAME) deployed_model_preds = np.array(json.loads(scoring_response.content)) np.testing.assert_array_almost_equal(deployed_model_preds, spark_model_iris.predictions, decimal=4)
def test_sagemaker_docker_model_scoring_with_default_conda_env(lgb_model, model_path): mlflow.lightgbm.save_model(lgb_model=lgb_model.model, path=model_path, conda_env=None) reloaded_pyfunc = pyfunc.load_pyfunc(model_uri=model_path) scoring_response = score_model_in_sagemaker_docker_container( model_uri=model_path, data=lgb_model.inference_dataframe, content_type=pyfunc_scoring_server.CONTENT_TYPE_JSON_SPLIT_ORIENTED, flavor=mlflow.pyfunc.FLAVOR_NAME) deployed_model_preds = pd.DataFrame(json.loads(scoring_response.content)) pandas.testing.assert_frame_equal( deployed_model_preds, pd.DataFrame(reloaded_pyfunc.predict(lgb_model.inference_dataframe)), check_dtype=False, check_less_precise=6)
def test_model_deployment(spark_model_iris, model_path, spark_custom_env): mlflow.spark.save_model( spark_model_iris.model, path=model_path, conda_env=spark_custom_env, sample_input=spark_model_iris.spark_df, ) scoring_response = score_model_in_sagemaker_docker_container( model_uri=model_path, data=spark_model_iris.pandas_df.to_json(orient="split"), content_type=mlflow.pyfunc.scoring_server.CONTENT_TYPE_JSON, flavor=mlflow.mleap.FLAVOR_NAME, ) np.testing.assert_array_almost_equal( spark_model_iris.predictions, np.array(json.loads(scoring_response.content)), decimal=4 )
def test_sagemaker_docker_model_scoring_with_default_conda_env(h2o_iris_model, model_path): mlflow.h2o.save_model(h2o_model=h2o_iris_model.model, path=model_path, conda_env=None) reloaded_h2o_pyfunc = mlflow.pyfunc.load_pyfunc(model_path) scoring_response = score_model_in_sagemaker_docker_container( model_uri=model_path, data=h2o_iris_model.inference_data.as_data_frame(), content_type=pyfunc_scoring_server.CONTENT_TYPE_JSON, flavor=mlflow.pyfunc.FLAVOR_NAME) deployed_model_preds = pd.DataFrame(json.loads(scoring_response.content)) pandas.testing.assert_frame_equal( deployed_model_preds["predict"].to_frame(), reloaded_h2o_pyfunc.predict( h2o_iris_model.inference_data.as_data_frame())["predict"].to_frame(), check_dtype=False, check_less_precise=6)
def test_model_deployment_with_default_conda_env(saved_tf_iris_model, model_path): mlflow.tensorflow.save_model(tf_saved_model_dir=saved_tf_iris_model.path, tf_meta_graph_tags=saved_tf_iris_model.meta_graph_tags, tf_signature_def_key=saved_tf_iris_model.signature_def_key, path=model_path, conda_env=None) scoring_response = score_model_in_sagemaker_docker_container( model_path=model_path, data=saved_tf_iris_model.inference_df, content_type=pyfunc_scoring_server.CONTENT_TYPE_JSON_SPLIT_ORIENTED, flavor=mlflow.pyfunc.FLAVOR_NAME) deployed_model_preds = pd.DataFrame(json.loads(scoring_response.content)) pandas.testing.assert_frame_equal( deployed_model_preds, saved_tf_iris_model.expected_results_df, check_dtype=False, check_less_precise=6)
def test_model_export(tmpdir): conda_env = os.path.join(str(tmpdir), "conda_env.yml") _mlflow_conda_env( conda_env, additional_pip_deps=["pyspark=={}".format(pyspark_version)]) iris = datasets.load_iris() X = iris.data # we only take the first two features. y = iris.target pandas_df = pd.DataFrame(X, columns=iris.feature_names) pandas_df['label'] = pd.Series(y) spark_session = pyspark.sql.SparkSession.builder \ .config(key="spark_session.python.worker.reuse", value=True) \ .master("local-cluster[2, 1, 1024]") \ .getOrCreate() spark_df = spark_session.createDataFrame(pandas_df) model_path = tmpdir.mkdir("model") assembler = VectorAssembler(inputCols=iris.feature_names, outputCol="features") lr = LogisticRegression(maxIter=50, regParam=0.1, elasticNetParam=0.8) pipeline = Pipeline(stages=[assembler, lr]) # Fit the model model = pipeline.fit(spark_df) # Print the coefficients and intercept for multinomial logistic regression preds_df = model.transform(spark_df) preds1 = [x.prediction for x in preds_df.select("prediction").collect()] sparkm.save_model(model, path=str(model_path), conda_env=conda_env) reloaded_model = sparkm.load_model(path=str(model_path)) preds_df_1 = reloaded_model.transform(spark_df) preds1_1 = [ x.prediction for x in preds_df_1.select("prediction").collect() ] assert preds1 == preds1_1 m = pyfunc.load_pyfunc(str(model_path)) preds2 = m.predict(pandas_df) assert preds1 == preds2 preds3 = score_model_in_sagemaker_docker_container( model_path=str(model_path), data=pandas_df) assert preds1 == preds3 assert os.path.exists(sparkm.DFS_TMP) print(os.listdir(sparkm.DFS_TMP)) assert not os.listdir(sparkm.DFS_TMP)
def test_model_export(spark_model_iris, model_path, spark_conda_env): preds_df = spark_model_iris.model.transform(spark_model_iris.training_df) preds1 = [x.prediction for x in preds_df.select("prediction").collect()] sparkm.save_model(spark_model_iris.model, path=model_path, conda_env=spark_conda_env) reloaded_model = sparkm.load_model(path=model_path) preds_df_1 = reloaded_model.transform(spark_model_iris.training_df) preds1_1 = [ x.prediction for x in preds_df_1.select("prediction").collect() ] assert preds1 == preds1_1 m = pyfunc.load_pyfunc(model_path) preds2 = m.predict(spark_model_iris.inference_df) assert preds1 == preds2 preds3 = score_model_in_sagemaker_docker_container( model_path=model_path, data=spark_model_iris.inference_df) assert preds1 == preds3 assert os.path.exists(sparkm.DFS_TMP) print(os.listdir(sparkm.DFS_TMP)) # We expect not to delete the DFS tempdir. assert os.listdir(sparkm.DFS_TMP)