Ejemplo n.º 1
0
def test_container_scoring_with_sparkml_and_mleap_outputs_same_format(
        spark_model_iris, model_path, spark_conda_env):
    sparkml_model = Model()
    sparkm.save_model(spark_model_iris.model,
                      path=model_path,
                      conda_env=spark_conda_env,
                      mlflow_model=sparkml_model)
    assert sparkm.FLAVOR_NAME in sparkml_model.flavors
    assert mleap.FLAVOR_NAME not in sparkml_model.flavors
    sparkml_preds = score_model_in_sagemaker_docker_container(
        model_path=model_path, data=spark_model_iris.inference_df)
    shutil.rmtree(model_path)
    assert not os.path.exists(model_path)
    os.makedirs(model_path)
    assert os.path.exists(model_path)
    mleap_model = Model()
    sparkm.save_model(spark_model_iris.model,
                      path=model_path,
                      sample_input=spark_model_iris.training_df,
                      mlflow_model=mleap_model)
    assert mleap.FLAVOR_NAME in mleap_model.flavors
    mleap_preds = score_model_in_sagemaker_docker_container(
        model_path=model_path, data=spark_model_iris.inference_df)
    assert isinstance(sparkml_preds, list)
    assert isinstance(mleap_preds, list)
    assert len(mleap_preds) == len(sparkml_preds)
    assert [isinstance(entry, int) for entry in sparkml_preds]
    assert [isinstance(entry, int) for entry in mleap_preds]
Ejemplo n.º 2
0
def test_model_deployment(spark_model_iris, model_path, spark_custom_env):
    sparkm.save_model(
        spark_model_iris.model,
        path=model_path,
        conda_env=spark_custom_env,
        # Test both spark ml and mleap
        sample_input=spark_model_iris.spark_df)

    # 1. score and compare pyfunc deployed in Sagemaker docker container
    scoring_response_1 = score_model_in_sagemaker_docker_container(
        model_uri=model_path,
        data=spark_model_iris.pandas_df,
        content_type=pyfunc_scoring_server.CONTENT_TYPE_JSON_SPLIT_ORIENTED,
        flavor=mlflow.pyfunc.FLAVOR_NAME)
    np.testing.assert_array_almost_equal(
        spark_model_iris.predictions,
        np.array(json.loads(scoring_response_1.content)),
        decimal=4)
    # 2. score and compare mleap deployed in Sagemaker docker container
    scoring_response_2 = score_model_in_sagemaker_docker_container(
        model_uri=model_path,
        data=spark_model_iris.pandas_df.to_json(orient="split"),
        content_type=pyfunc_scoring_server.CONTENT_TYPE_JSON,
        flavor=mlflow.mleap.FLAVOR_NAME)
    np.testing.assert_array_almost_equal(
        spark_model_iris.predictions,
        np.array(json.loads(scoring_response_2.content)),
        decimal=4)
def test_model_deployment(spark_model_iris, model_path, spark_conda_env):
    sparkm.save_model(spark_model_iris.model, path=model_path,
                      conda_env=spark_conda_env,
                      # Test both spark ml and mleap
                      sample_input=spark_model_iris.spark_df)

    # 1. score and compare pyfunc deployed in Sagemaker docker container
    preds1 = score_model_in_sagemaker_docker_container(model_path=model_path,
                                                       data=spark_model_iris.pandas_df,
                                                       flavor=mlflow.pyfunc.FLAVOR_NAME)
    assert spark_model_iris.predictions == preds1
    # 2. score and compare mleap deployed in Sagemaker docker container
    preds2 = score_model_in_sagemaker_docker_container(model_path=model_path,
                                                       data=spark_model_iris.pandas_df,
                                                       flavor=mlflow.mleap.FLAVOR_NAME)
    assert spark_model_iris.predictions == preds2
Ejemplo n.º 4
0
 def test_model_export(self):
     path_to_remove = None
     try:
         with TempDir(chdr=True, remove_on_exit=False) as tmp:
             path_to_remove = tmp._path
             # NOTE: Changed dir to temp dir and use relative paths to get around the way temp
             # dirs are handled in python.
             model_pkl = tmp.path("model.pkl")
             with open(model_pkl, "wb") as f:
                 pickle.dump(self._linear_lr, f)
             input_path = tmp.path("input_model")
             conda_env = "conda.env"
             _mlflow_conda_env(path=tmp.path(conda_env))
             pyfunc.save_model(input_path, loader_module="mlflow.sklearn",
                               data_path=model_pkl,
                               conda_env=conda_env)
             scoring_response = score_model_in_sagemaker_docker_container(
                     model_path=input_path,
                     data=self._iris_df,
                     content_type=pyfunc_scoring_server.CONTENT_TYPE_JSON_SPLIT_ORIENTED,
                     flavor=pyfunc.FLAVOR_NAME)
             np.testing.assert_array_equal(
                     self._linear_lr_predict, np.array(json.loads(scoring_response.content)))
     finally:
         if path_to_remove:
             try:
                 import shutil
                 shutil.rmtree(path_to_remove)
             except OSError:
                 print("Failed to remove", path_to_remove)
Ejemplo n.º 5
0
 def test_model_export(self):
     path_to_remove = None
     try:
         with TempDir(chdr=True, remove_on_exit=False) as tmp:
             path_to_remove = tmp._path
             # NOTE: Changed dir to temp dir and use relative paths to get around the way temp
             # dirs are handled in python.
             model_pkl = tmp.path("model.pkl")
             with open(model_pkl, "wb") as f:
                 pickle.dump(self._linear_lr, f)
             input_path = tmp.path("input_model")
             conda_env = "conda.env"
             pyfunc.save_model(input_path,
                               loader_module="mlflow.sklearn",
                               data_path=model_pkl,
                               conda_env=_mlflow_conda_env(
                                   tmp.path(conda_env)))
             xpred = score_model_in_sagemaker_docker_container(
                 input_path, self._iris_df)
             print('expected', self._linear_lr_predict)
             print('actual  ', xpred)
             np.testing.assert_array_equal(self._linear_lr_predict, xpred)
     finally:
         if path_to_remove:
             try:
                 import shutil
                 shutil.rmtree(path_to_remove)
             except PermissionError:
                 print("Failed to remove", path_to_remove)
def test_sagemaker_docker_model_scoring_with_default_conda_env(
        sklearn_logreg_model, main_scoped_model_class, iris_data, tmpdir):
    sklearn_model_path = os.path.join(str(tmpdir), "sklearn_model")
    mlflow.sklearn.save_model(sk_model=sklearn_logreg_model,
                              path=sklearn_model_path)

    def test_predict(sk_model, model_input):
        return sk_model.predict(model_input) * 2

    pyfunc_model_path = os.path.join(str(tmpdir), "pyfunc_model")
    mlflow.pyfunc.save_model(
        path=pyfunc_model_path,
        artifacts={"sk_model": sklearn_model_path},
        python_model=main_scoped_model_class(test_predict))
    reloaded_pyfunc = mlflow.pyfunc.load_pyfunc(model_uri=pyfunc_model_path)

    inference_df = pd.DataFrame(iris_data[0])
    scoring_response = score_model_in_sagemaker_docker_container(
        model_uri=pyfunc_model_path,
        data=inference_df,
        content_type=pyfunc_scoring_server.CONTENT_TYPE_JSON_SPLIT_ORIENTED,
        flavor=mlflow.pyfunc.FLAVOR_NAME)
    deployed_model_preds = pd.DataFrame(json.loads(scoring_response.content))

    pandas.testing.assert_frame_equal(
        deployed_model_preds,
        pd.DataFrame(reloaded_pyfunc.predict(inference_df)),
        check_dtype=False,
        check_less_precise=6)
Ejemplo n.º 7
0
def test_container_scoring_responds_to_bad_inputs_using_error_message_with_mleap_flavor(
        spark_model_iris, model_path):
    mleap_model = Model()
    sparkm.save_model(spark_model_iris.model,
                      path=model_path,
                      sample_input=spark_model_iris.training_df,
                      mlflow_model=mleap_model)
    assert mleap.FLAVOR_NAME in mleap_model.flavors
    mleap_response = score_model_in_sagemaker_docker_container(
        model_path=model_path, data="invalid")
    assert "Error" in mleap_response.keys()
    print(mleap_response["Error"])
Ejemplo n.º 8
0
def test_sagemaker_docker_model_scoring_with_default_conda_env(model, model_path, data, predicted):
    mlflow.keras.save_model(keras_model=model, path=model_path, conda_env=None)

    scoring_response = score_model_in_sagemaker_docker_container(
        model_uri=model_path,
        data=data[0],
        content_type=pyfunc_scoring_server.CONTENT_TYPE_JSON_SPLIT_ORIENTED,
        flavor=mlflow.pyfunc.FLAVOR_NAME,
        activity_polling_timeout_seconds=500,
    )
    deployed_model_preds = pd.DataFrame(json.loads(scoring_response.content))

    np.testing.assert_array_almost_equal(deployed_model_preds.values, predicted, decimal=4)
Ejemplo n.º 9
0
def test_input_with_mleap_flavor(spark_model_iris, model_path):
    """
    Test that the mleap model deployed in Docker container throws an error when it gets bad input.
    """
    mleap_model = Model()
    sparkm.save_model(spark_model_iris.model,
                      path=model_path,
                      sample_input=spark_model_iris.training_df,
                      mlflow_model=mleap_model)
    assert mleap.FLAVOR_NAME in mleap_model.flavors
    with pytest.raises(Exception):
        mleap_response = score_model_in_sagemaker_docker_container(
            model_path=model_path, data="invalid")
Ejemplo n.º 10
0
def test_model_deployment(spark_model_iris, model_path, spark_custom_env):
    sparkm.save_model(
        spark_model_iris.model, path=model_path, conda_env=spark_custom_env,
    )
    scoring_response = score_model_in_sagemaker_docker_container(
        model_uri=model_path,
        data=spark_model_iris.pandas_df,
        content_type=pyfunc_scoring_server.CONTENT_TYPE_JSON_SPLIT_ORIENTED,
        flavor=mlflow.pyfunc.FLAVOR_NAME,
    )
    np.testing.assert_array_almost_equal(
        spark_model_iris.predictions, np.array(json.loads(scoring_response.content)), decimal=4
    )
Ejemplo n.º 11
0
def test_sagemaker_docker_model_scoring_with_default_conda_env(
        spark_model_iris, model_path):
    sparkm.save_model(spark_model_iris.model, path=model_path, conda_env=None)

    scoring_response = score_model_in_sagemaker_docker_container(
        model_uri=model_path,
        data=spark_model_iris.pandas_df,
        content_type=pyfunc_scoring_server.CONTENT_TYPE_JSON,
        flavor=mlflow.pyfunc.FLAVOR_NAME)
    deployed_model_preds = np.array(json.loads(scoring_response.content))

    np.testing.assert_array_almost_equal(deployed_model_preds,
                                         spark_model_iris.predictions,
                                         decimal=4)
Ejemplo n.º 12
0
def test_sagemaker_docker_model_scoring_with_default_conda_env(lgb_model, model_path):
    mlflow.lightgbm.save_model(lgb_model=lgb_model.model, path=model_path, conda_env=None)
    reloaded_pyfunc = pyfunc.load_pyfunc(model_uri=model_path)

    scoring_response = score_model_in_sagemaker_docker_container(
            model_uri=model_path,
            data=lgb_model.inference_dataframe,
            content_type=pyfunc_scoring_server.CONTENT_TYPE_JSON_SPLIT_ORIENTED,
            flavor=mlflow.pyfunc.FLAVOR_NAME)
    deployed_model_preds = pd.DataFrame(json.loads(scoring_response.content))

    pandas.testing.assert_frame_equal(
        deployed_model_preds,
        pd.DataFrame(reloaded_pyfunc.predict(lgb_model.inference_dataframe)),
        check_dtype=False,
        check_less_precise=6)
Ejemplo n.º 13
0
def test_model_deployment(spark_model_iris, model_path, spark_custom_env):
    mlflow.spark.save_model(
        spark_model_iris.model,
        path=model_path,
        conda_env=spark_custom_env,
        sample_input=spark_model_iris.spark_df,
    )

    scoring_response = score_model_in_sagemaker_docker_container(
        model_uri=model_path,
        data=spark_model_iris.pandas_df.to_json(orient="split"),
        content_type=mlflow.pyfunc.scoring_server.CONTENT_TYPE_JSON,
        flavor=mlflow.mleap.FLAVOR_NAME,
    )
    np.testing.assert_array_almost_equal(
        spark_model_iris.predictions, np.array(json.loads(scoring_response.content)), decimal=4
    )
Ejemplo n.º 14
0
def test_sagemaker_docker_model_scoring_with_default_conda_env(h2o_iris_model, model_path):
    mlflow.h2o.save_model(h2o_model=h2o_iris_model.model, path=model_path, conda_env=None)
    reloaded_h2o_pyfunc = mlflow.pyfunc.load_pyfunc(model_path)

    scoring_response = score_model_in_sagemaker_docker_container(
            model_uri=model_path,
            data=h2o_iris_model.inference_data.as_data_frame(),
            content_type=pyfunc_scoring_server.CONTENT_TYPE_JSON,
            flavor=mlflow.pyfunc.FLAVOR_NAME)
    deployed_model_preds = pd.DataFrame(json.loads(scoring_response.content))

    pandas.testing.assert_frame_equal(
        deployed_model_preds["predict"].to_frame(),
        reloaded_h2o_pyfunc.predict(
            h2o_iris_model.inference_data.as_data_frame())["predict"].to_frame(),
        check_dtype=False,
        check_less_precise=6)
def test_model_deployment_with_default_conda_env(saved_tf_iris_model, model_path):
    mlflow.tensorflow.save_model(tf_saved_model_dir=saved_tf_iris_model.path,
                                 tf_meta_graph_tags=saved_tf_iris_model.meta_graph_tags,
                                 tf_signature_def_key=saved_tf_iris_model.signature_def_key,
                                 path=model_path,
                                 conda_env=None)

    scoring_response = score_model_in_sagemaker_docker_container(
            model_path=model_path,
            data=saved_tf_iris_model.inference_df,
            content_type=pyfunc_scoring_server.CONTENT_TYPE_JSON_SPLIT_ORIENTED,
            flavor=mlflow.pyfunc.FLAVOR_NAME)
    deployed_model_preds = pd.DataFrame(json.loads(scoring_response.content))

    pandas.testing.assert_frame_equal(
        deployed_model_preds,
        saved_tf_iris_model.expected_results_df,
        check_dtype=False,
        check_less_precise=6)
Ejemplo n.º 16
0
def test_model_export(tmpdir):
    conda_env = os.path.join(str(tmpdir), "conda_env.yml")
    _mlflow_conda_env(
        conda_env, additional_pip_deps=["pyspark=={}".format(pyspark_version)])
    iris = datasets.load_iris()
    X = iris.data  # we only take the first two features.
    y = iris.target
    pandas_df = pd.DataFrame(X, columns=iris.feature_names)
    pandas_df['label'] = pd.Series(y)
    spark_session = pyspark.sql.SparkSession.builder \
        .config(key="spark_session.python.worker.reuse", value=True) \
        .master("local-cluster[2, 1, 1024]") \
        .getOrCreate()
    spark_df = spark_session.createDataFrame(pandas_df)
    model_path = tmpdir.mkdir("model")
    assembler = VectorAssembler(inputCols=iris.feature_names,
                                outputCol="features")
    lr = LogisticRegression(maxIter=50, regParam=0.1, elasticNetParam=0.8)
    pipeline = Pipeline(stages=[assembler, lr])
    # Fit the model
    model = pipeline.fit(spark_df)
    # Print the coefficients and intercept for multinomial logistic regression
    preds_df = model.transform(spark_df)
    preds1 = [x.prediction for x in preds_df.select("prediction").collect()]
    sparkm.save_model(model, path=str(model_path), conda_env=conda_env)
    reloaded_model = sparkm.load_model(path=str(model_path))
    preds_df_1 = reloaded_model.transform(spark_df)
    preds1_1 = [
        x.prediction for x in preds_df_1.select("prediction").collect()
    ]
    assert preds1 == preds1_1
    m = pyfunc.load_pyfunc(str(model_path))
    preds2 = m.predict(pandas_df)
    assert preds1 == preds2
    preds3 = score_model_in_sagemaker_docker_container(
        model_path=str(model_path), data=pandas_df)
    assert preds1 == preds3
    assert os.path.exists(sparkm.DFS_TMP)
    print(os.listdir(sparkm.DFS_TMP))
    assert not os.listdir(sparkm.DFS_TMP)
Ejemplo n.º 17
0
def test_model_export(spark_model_iris, model_path, spark_conda_env):
    preds_df = spark_model_iris.model.transform(spark_model_iris.training_df)
    preds1 = [x.prediction for x in preds_df.select("prediction").collect()]
    sparkm.save_model(spark_model_iris.model,
                      path=model_path,
                      conda_env=spark_conda_env)
    reloaded_model = sparkm.load_model(path=model_path)
    preds_df_1 = reloaded_model.transform(spark_model_iris.training_df)
    preds1_1 = [
        x.prediction for x in preds_df_1.select("prediction").collect()
    ]
    assert preds1 == preds1_1
    m = pyfunc.load_pyfunc(model_path)
    preds2 = m.predict(spark_model_iris.inference_df)
    assert preds1 == preds2
    preds3 = score_model_in_sagemaker_docker_container(
        model_path=model_path, data=spark_model_iris.inference_df)
    assert preds1 == preds3
    assert os.path.exists(sparkm.DFS_TMP)
    print(os.listdir(sparkm.DFS_TMP))
    # We expect not to delete the DFS tempdir.
    assert os.listdir(sparkm.DFS_TMP)