def test_categorical_model_can_be_loaded_and_evaluated_as_pyfunc(
        saved_tf_categorical_model, model_path):
    mlflow.tensorflow.save_model(
        tf_saved_model_dir=saved_tf_categorical_model.path,
        tf_meta_graph_tags=saved_tf_categorical_model.meta_graph_tags,
        tf_signature_def_key=saved_tf_categorical_model.signature_def_key,
        path=model_path)

    pyfunc_wrapper = pyfunc.load_pyfunc(model_path)
    results_df = pyfunc_wrapper.predict(
        saved_tf_categorical_model.inference_df)
    pandas.testing.assert_frame_equal(
        results_df,
        saved_tf_categorical_model.expected_results_df,
        check_less_precise=6)
Ejemplo n.º 2
0
def test_model_export(spark_model_iris, model_path, spark_custom_env):
    sparkm.save_model(spark_model_iris.model, path=model_path, conda_env=spark_custom_env)
    # 1. score and compare reloaded sparkml model
    reloaded_model = sparkm.load_model(model_uri=model_path)
    preds_df = reloaded_model.transform(spark_model_iris.spark_df)
    preds1 = [x.prediction for x in preds_df.select("prediction").collect()]
    assert spark_model_iris.predictions == preds1
    m = pyfunc.load_pyfunc(model_path)
    # 2. score and compare reloaded pyfunc
    preds2 = m.predict(spark_model_iris.pandas_df)
    assert spark_model_iris.predictions == preds2
    # 3. score and compare reloaded pyfunc Spark udf
    preds3 = score_model_as_udf(model_uri=model_path, pandas_df=spark_model_iris.pandas_df)
    assert spark_model_iris.predictions == preds3
    assert os.path.exists(sparkm.DFS_TMP)
Ejemplo n.º 3
0
def test_sklearn_model_save_load(xgb_sklearn_model, model_path):
    model = xgb_sklearn_model.model
    mlflow.xgboost.save_model(xgb_model=model, path=model_path)
    reloaded_model = mlflow.xgboost.load_model(model_uri=model_path)
    reloaded_pyfunc = pyfunc.load_pyfunc(model_uri=model_path)

    np.testing.assert_array_almost_equal(
        model.predict(xgb_sklearn_model.inference_dataframe),
        reloaded_model.predict(xgb_sklearn_model.inference_dataframe),
    )

    np.testing.assert_array_almost_equal(
        reloaded_model.predict(xgb_sklearn_model.inference_dataframe),
        reloaded_pyfunc.predict(xgb_sklearn_model.inference_dataframe),
    )
Ejemplo n.º 4
0
 def test_model_save_load(self):
     with TempDir(chdr=True, remove_on_exit=True) as tmp:
         model_path = tmp.path("knn.pkl")
         with open(model_path, "wb") as f:
             pickle.dump(self._knn, f)
         path = tmp.path("knn")
         sklearn.save_model(self._knn, path=path)
         x = sklearn.load_model(path)
         xpred = x.predict(self._X)
         np.testing.assert_array_equal(self._knn_predict, xpred)
         # sklearn should also be stored as a valid pyfunc model
         # test pyfunc compatibility
         y = pyfunc.load_pyfunc(path)
         ypred = y.predict(self._X)
         np.testing.assert_array_equal(self._knn_predict, ypred)
Ejemplo n.º 5
0
def test_model_save_load(cb_model, model_path):
    model, inference_dataframe = cb_model
    mlflow.catboost.save_model(cb_model=model, path=model_path)

    loaded_model = mlflow.catboost.load_model(model_uri=model_path)
    np.testing.assert_array_almost_equal(
        model.predict(inference_dataframe),
        loaded_model.predict(inference_dataframe),
    )

    loaded_pyfunc = pyfunc.load_pyfunc(model_uri=model_path)
    np.testing.assert_array_almost_equal(
        loaded_model.predict(inference_dataframe),
        loaded_pyfunc.predict(inference_dataframe),
    )
Ejemplo n.º 6
0
def test_load_pyfunc_loads_torch_model_using_pickle_module_specified_at_save_time(
        module_scoped_subclassed_model, model_path):
    custom_pickle_module = pickle

    mlflow.pytorch.save_model(path=model_path,
                              pytorch_model=module_scoped_subclassed_model,
                              conda_env=None,
                              pickle_module=custom_pickle_module)

    import_module_fn = importlib.import_module
    imported_modules = []

    def track_module_imports(module_name):
        imported_modules.append(module_name)
        return import_module_fn(module_name)

    with mock.patch("importlib.import_module") as import_mock,\
            mock.patch("torch.load") as torch_load_mock:
        import_mock.side_effect = track_module_imports
        pyfunc.load_pyfunc(model_path)

    torch_load_mock.assert_called_with(mock.ANY,
                                       pickle_module=custom_pickle_module)
    assert custom_pickle_module.__name__ in imported_modules
Ejemplo n.º 7
0
def test_model_log(tmpdir):
    conda_env = os.path.join(str(tmpdir), "conda_env.yml")
    _mlflow_conda_env(
        conda_env, additional_pip_deps=["pyspark=={}".format(pyspark_version)])
    iris = datasets.load_iris()
    X = iris.data  # we only take the first two features.
    y = iris.target
    pandas_df = pd.DataFrame(X, columns=iris.feature_names)
    pandas_df['label'] = pd.Series(y)
    spark_session = pyspark.sql.SparkSession.builder \
        .config(key="spark_session.python.worker.reuse", value=True) \
        .master("local-cluster[2, 1, 1024]") \
        .getOrCreate()
    spark_df = spark_session.createDataFrame(pandas_df)
    model_path = tmpdir.mkdir("model")
    assembler = VectorAssembler(inputCols=iris.feature_names,
                                outputCol="features")
    lr = LogisticRegression(maxIter=50, regParam=0.1, elasticNetParam=0.8)
    pipeline = Pipeline(stages=[assembler, lr])
    # Fit the model
    model = pipeline.fit(spark_df)
    # Print the coefficients and intercept for multinomial logistic regression
    preds_df = model.transform(spark_df)
    preds1 = [x.prediction for x in preds_df.select("prediction").collect()]
    old_tracking_uri = tracking.get_tracking_uri()
    # should_start_run tests whether or not calling log_model() automatically starts a run.
    for should_start_run in [False, True]:
        try:
            tracking_dir = os.path.abspath(str(tmpdir.mkdir("mlruns")))
            tracking.set_tracking_uri("file://%s" % tracking_dir)
            if should_start_run:
                tracking.start_run()
            sparkm.log_model(artifact_path="model", spark_model=model)
            run_id = tracking.active_run().info.run_uuid
            x = pyfunc.load_pyfunc("model", run_id=run_id)
            preds2 = x.predict(pandas_df)
            assert preds1 == preds2
            reloaded_model = sparkm.load_model("model", run_id=run_id)
            preds_df_1 = reloaded_model.transform(spark_df)
            preds3 = [
                x.prediction
                for x in preds_df_1.select("prediction").collect()
            ]
            assert preds1 == preds3
        finally:
            tracking.end_run()
            tracking.set_tracking_uri(old_tracking_uri)
            shutil.rmtree(tracking_dir)
Ejemplo n.º 8
0
def test_model_save_load(sklearn_knn_model, model_path):
    knn_model = sklearn_knn_model.model

    mlflow.sklearn.save_model(sk_model=knn_model, path=model_path)
    reloaded_knn_model = mlflow.sklearn.load_model(model_uri=model_path)
    reloaded_knn_pyfunc = pyfunc.load_pyfunc(model_uri=model_path)

    np.testing.assert_array_equal(
        knn_model.predict(sklearn_knn_model.inference_data),
        reloaded_knn_model.predict(sklearn_knn_model.inference_data),
    )

    np.testing.assert_array_equal(
        reloaded_knn_model.predict(sklearn_knn_model.inference_data),
        reloaded_knn_pyfunc.predict(sklearn_knn_model.inference_data),
    )
Ejemplo n.º 9
0
def test_sagemaker_docker_model_scoring_with_default_conda_env(lgb_model, model_path):
    mlflow.lightgbm.save_model(lgb_model=lgb_model.model, path=model_path, conda_env=None)
    reloaded_pyfunc = pyfunc.load_pyfunc(model_uri=model_path)

    scoring_response = score_model_in_sagemaker_docker_container(
            model_uri=model_path,
            data=lgb_model.inference_dataframe,
            content_type=pyfunc_scoring_server.CONTENT_TYPE_JSON_SPLIT_ORIENTED,
            flavor=mlflow.pyfunc.FLAVOR_NAME)
    deployed_model_preds = pd.DataFrame(json.loads(scoring_response.content))

    pandas.testing.assert_frame_equal(
        deployed_model_preds,
        pd.DataFrame(reloaded_pyfunc.predict(lgb_model.inference_dataframe)),
        check_dtype=False,
        check_less_precise=6)
Ejemplo n.º 10
0
def serve(model_uri, port, host, no_conda):
    """
    Serve a pyfunc model saved with MLflow by launching a webserver on the specified
    host and port. For information about the input data formats accepted by the webserver,
    see the following documentation:
    https://www.mlflow.org/docs/latest/models.html#pyfunc-deployment.
    """
    local_model_path = _download_artifact_from_uri(artifact_uri=model_uri)

    model_env_file = _load_model_env(path=local_model_path)
    if not no_conda and model_env_file is not None:
        conda_env_path = os.path.join(local_model_path, model_env_file)
        return _rerun_in_conda(conda_env_path)

    app = scoring_server.init(load_pyfunc(local_model_path))
    app.run(port=port, host=host)
def test_model_save_load(lgb_model, model_path):
    model = lgb_model.model

    mlflow.lightgbm.save_model(lgb_model=model, path=model_path)
    reloaded_model = mlflow.lightgbm.load_model(model_uri=model_path)
    reloaded_pyfunc = pyfunc.load_pyfunc(model_uri=model_path)

    np.testing.assert_array_almost_equal(
        model.predict(lgb_model.inference_dataframe),
        reloaded_model.predict(lgb_model.inference_dataframe),
    )

    np.testing.assert_array_almost_equal(
        reloaded_model.predict(lgb_model.inference_dataframe),
        reloaded_pyfunc.predict(lgb_model.inference_dataframe),
    )
Ejemplo n.º 12
0
def test_model_save_load(pd_model, model_path):
    mlflow.paddle.save_model(pd_model=pd_model.model, path=model_path)

    reloaded_pd_model = mlflow.paddle.load_model(model_uri=model_path)
    reloaded_pyfunc = pyfunc.load_pyfunc(model_uri=model_path)

    np.testing.assert_array_almost_equal(
        pd_model.model(pd_model.inference_dataframe),
        reloaded_pyfunc.predict(pd_model.inference_dataframe),
        decimal=5,
    )

    np.testing.assert_array_almost_equal(
        reloaded_pd_model(pd_model.inference_dataframe),
        reloaded_pyfunc.predict(pd_model.inference_dataframe),
        decimal=5,
    )
Ejemplo n.º 13
0
def serve(model_path, run_id, port, host, no_conda):
    """
    Serve a PythonFunction model saved with MLflow.

    If a ``run_id`` is specified, ``model-path`` is treated as an artifact path within that run;
    otherwise it is treated as a local path.
    """
    if run_id:
        model_path = _get_model_log_dir(model_path, run_id)

    model_env_file = _load_model_env(model_path)
    if not no_conda and model_env_file is not None:
        conda_env_path = os.path.join(model_path, model_env_file)
        return _rerun_in_conda(conda_env_path)

    app = scoring_server.init(load_pyfunc(model_path))
    app.run(port=port, host=host)
Ejemplo n.º 14
0
def test_diviner_pyfunc_group_predict_prophet(grouped_prophet, model_path, diviner_data):

    groups = []
    for i in [0, -1]:
        key_entries = []
        for value in diviner_data.df[diviner_data.key_columns].iloc[[i]].to_dict().values():
            key_entries.append(list(value.values())[0])
        groups.append(tuple(key_entries))

    mlflow.diviner.save_model(diviner_model=grouped_prophet, path=model_path)
    loaded_pyfunc_model = pyfunc.load_pyfunc(model_uri=model_path)

    local_group_pred = grouped_prophet.predict_groups(groups=groups, horizon=10, frequency="D")
    pyfunc_conf = pd.DataFrame({"groups": [groups], "horizon": 10, "frequency": "D"}, index=[0])
    pyfunc_group_predict = loaded_pyfunc_model.predict(pyfunc_conf)

    pd.testing.assert_frame_equal(local_group_pred, pyfunc_group_predict)
Ejemplo n.º 15
0
def predict(model_path, run_id, input_path, output_path):
    """
    Loads a Pandas DataFrame and runs a PythonFunction model saved with MLflow against it.
    This method will return the prediction results as a CSV-formatted Pandas DataFrame.

    If a run_id is specified, MODEL_PATH is treated as an artifact path within that run;
    otherwise it is treated as a local path.
    """
    if run_id:
        model_path = _get_model_log_dir(model_path, run_id)
    model = load_pyfunc(model_path)
    df = pandas.read_csv(input_path)
    result = model.predict(df)
    out_stream = sys.stdout
    if output_path:
        out_stream = open(output_path, 'w')
    pandas.DataFrame(data=result).to_csv(out_stream, header=False, index=False)
Ejemplo n.º 16
0
 def test_log_saved_model(self):
     with TempDir(chdr=False, remove_on_exit=True) as tmp:
         # Setting the logging such that it is in the temp folder and deleted after the test.
         old_tracking_dir = tracking.get_tracking_uri()
         tracking_dir = os.path.abspath(tmp.path("mlruns"))
         tracking.set_tracking_uri("file://%s" % tracking_dir)
         tracking.start_run()
         try:
             # Creating dict of features names (str) to placeholders (tensors)
             feature_spec = {}
             for name in self._feature_names:
                 feature_spec[name] = tf.placeholder("float",
                                                     name=name,
                                                     shape=[150])
             # Creating receiver function for model saving.
             receiver_fn = tf.estimator.export.build_raw_serving_input_receiver_fn(
                 feature_spec)
             saved_model_path = tmp.path("model")
             os.makedirs(saved_model_path)
             os.makedirs(tmp.path("hello"))
             # Saving Tensorflow model.
             saved_model_path = self._dnn.export_savedmodel(
                 saved_model_path, receiver_fn).decode("utf-8")
             # Logging the Tensorflow model just saved.
             tensorflow.log_saved_model(saved_model_dir=saved_model_path,
                                        signature_def_key="predict",
                                        artifact_path=tmp.path("hello"))
             # Loading the saved Tensorflow model as a pyfunc.
             x = pyfunc.load_pyfunc(saved_model_path)
             # Predicting on the iris dataset using the pyfunc.
             xpred = x.predict(
                 pandas.DataFrame(data=self._X,
                                  columns=self._feature_names))
             saved = []
             for s in self._dnn_predict:
                 saved.append(s['predictions'])
             loaded = []
             for index, rows in xpred.iterrows():
                 loaded.append(rows)
             # Asserting that the loaded model predictions are as expected.
             np.testing.assert_array_equal(saved, loaded)
         finally:
             # Restoring the old logging location.
             tracking.end_run()
             tracking.set_tracking_uri(old_tracking_dir)
Ejemplo n.º 17
0
def test_model_retrain_built_in_high_level_api(
    pd_model_built_in_high_level_api, model_path, model_retrain_path
):
    model = pd_model_built_in_high_level_api.model
    mlflow.paddle.save_model(pd_model=model, path=model_path, training=True)

    training_dataset, test_dataset = get_dataset_built_in_high_level_api()

    model_retrain = paddle.Model(UCIHousing())
    model_retrain = mlflow.paddle.load_model(model_uri=model_path, model=model_retrain)
    optim = paddle.optimizer.Adam(learning_rate=0.015, parameters=model.parameters())
    model_retrain.prepare(optim, paddle.nn.MSELoss())

    model_retrain.fit(training_dataset, epochs=6, batch_size=8, verbose=1)

    mlflow.paddle.save_model(pd_model=model_retrain, path=model_retrain_path, training=False)

    with pytest.raises(TypeError, match="This model can't be loaded"):
        mlflow.paddle.load_model(model_uri=model_retrain_path, model=model_retrain)

    error_model = 0
    error_model_type = type(error_model)
    with pytest.raises(
        TypeError,
        match="Invalid object type `{}` for `model`, must be `paddle.Model`".format(
            error_model_type
        ),
    ):
        mlflow.paddle.load_model(model_uri=model_retrain_path, model=error_model)

    reloaded_pd_model = mlflow.paddle.load_model(model_uri=model_retrain_path)
    reloaded_pyfunc = pyfunc.load_pyfunc(model_uri=model_retrain_path)
    low_level_test_dataset = [x[0] for x in test_dataset]

    np.testing.assert_array_almost_equal(
        np.array(model_retrain.predict(test_dataset)).squeeze(),
        np.array(reloaded_pyfunc.predict(np.array(low_level_test_dataset))).squeeze(),
        decimal=5,
    )

    np.testing.assert_array_almost_equal(
        np.array(reloaded_pd_model(np.array(low_level_test_dataset))).squeeze(),
        np.array(reloaded_pyfunc.predict(np.array(low_level_test_dataset))).squeeze(),
        decimal=5,
    )
Ejemplo n.º 18
0
    def test_spark_udf(self):
        pandas_df = self._pandas_df
        spark_df = self.spark.createDataFrame(pandas_df)
        pyfunc_udf = spark_udf(self.spark,
                               self._model_path,
                               result_type="integer")
        new_df = spark_df.withColumn("prediction",
                                     pyfunc_udf(*self._pandas_df.columns))
        spark_results = new_df.collect()

        # Compare against directly running the model.
        direct_model = load_pyfunc(self._model_path)
        pandas_results = direct_model.predict(pandas_df)
        self.assertEqual(178, len(pandas_results))
        self.assertEqual(178, len(spark_results))
        for i in range(0, len(pandas_results)):  # noqa
            self.assertEqual(self._predict[i], pandas_results[i])
            self.assertEqual(pandas_results[i], spark_results[i]['prediction'])
Ejemplo n.º 19
0
def test_gbt():
    old_uri = tracking.get_tracking_uri()
    with TempDir(chdr=False, remove_on_exit=True) as tmp:
        try:
            diamonds = tmp.path("diamonds")
            artifacts = tmp.path("artifacts")
            os.mkdir(diamonds)
            os.mkdir(artifacts)
            tracking.set_tracking_uri(artifacts)
            mlflow.set_experiment("test-experiment")
            # Download the diamonds dataset via mlflow run
            run(".", entry_point="main", version=None,
                parameters={"dest-dir": diamonds},
                mode="local", cluster_spec=None, git_username=None, git_password=None,
                use_conda=True, storage_dir=None)

            # Run the main gbt app via mlflow
            submitted_run = run(
                "apps/gbt-regression", entry_point="main", version=None,
                parameters={"train": os.path.join(diamonds, "train_diamonds.parquet"),
                            "test": os.path.join(diamonds, "test_diamonds.parquet"),
                            "n-trees": 10,
                            "m-depth": 3,
                            "learning-rate": .1,
                            "loss": "rmse",
                            "label-col": "price"},
                mode="local",
                cluster_spec=None, git_username=None, git_password=None, use_conda=True,
                storage_dir=None)

            pyfunc = load_pyfunc("model", run_id=submitted_run.run_id)
            df = pandas.read_parquet(os.path.join(diamonds, "test_diamonds.parquet"))

            # Removing the price column from the DataFrame so we can use the features to predict
            df = df.drop(columns="price")

            # Predicting from the saved pyfunc
            predict = pyfunc.predict(df)

            # Make sure the data is of the right type
            assert isinstance(predict[0], numpy.float32)
        finally:
            tracking.set_tracking_uri(old_uri)
Ejemplo n.º 20
0
    def get_or_load(archive_path):
        """Given a path returned by add_local_model(), this method will return a tuple of
        (loaded_model, local_model_path).
        If this Python process ever loaded the model before, we will reuse that copy.
        """
        if archive_path in SparkModelCache._models:
            SparkModelCache._cache_hits += 1
            return SparkModelCache._models[archive_path]

        local_model_dir = _SparkDirectoryDistributor.get_or_extract(
            archive_path)

        # We must rely on a supposed cyclic import here because we want this behavior
        # on the Spark Executors (i.e., don't try to pickle the load_model function).
        from mlflow.pyfunc import load_pyfunc  # pylint: disable=cyclic-import

        SparkModelCache._models[archive_path] = (load_pyfunc(local_model_dir),
                                                 local_model_dir)
        return SparkModelCache._models[archive_path]
Ejemplo n.º 21
0
 def helper(self, feature_spec, tmp, estimator, df):
     """
     This functions handles exporting, logging, loading back, and predicting on an estimator for
     testing purposes.
     """
     receiver_fn = tf.estimator.export.build_raw_serving_input_receiver_fn(feature_spec)
     saved_estimator_path = tmp.path("model")
     os.makedirs(saved_estimator_path)
     # Saving TensorFlow model.
     saved_estimator_path = estimator.export_savedmodel(saved_estimator_path,
                                                        receiver_fn).decode("utf-8")
     # Logging the TensorFlow model just saved.
     tensorflow.log_saved_model(saved_model_dir=saved_estimator_path,
                                signature_def_key="predict",
                                artifact_path="hello")
     # Loading the saved TensorFlow model as a pyfunc.
     x = pyfunc.load_pyfunc(saved_estimator_path)
     # Predicting on the dataset using the pyfunc.
     return x.predict(df)
Ejemplo n.º 22
0
 def test_model_save_load(self):
     with TempDir() as tmp:
         model_path = tmp.path("knn.pkl")
         with open(model_path, "wb") as f:
             pickle.dump(self._knn, f)
         path = tmp.path("knn")
         m = Model(run_id="test", artifact_path="testtest")
         pyfunc.save_model(dst_path=path,
                           data_path=model_path,
                           loader_module=os.path.basename(__file__)[:-3],
                           code_path=[__file__],
                           model=m)
         m2 = Model.load(os.path.join(path, "MLmodel"))
         print("m1", m.__dict__)
         print("m2", m2.__dict__)
         assert m.__dict__ == m2.__dict__
         x = pyfunc.load_pyfunc(path)
         xpred = x.predict(self._X)
         np.testing.assert_array_equal(self._knn_predict, xpred)
Ejemplo n.º 23
0
    def get_or_load(archive_path):
        """Given a path returned by add_local_model(), this method will return the loaded model.
        If this Python process ever loaded the model before, we will reuse that copy.
        """
        if archive_path in SparkModelCache._models:
            SparkModelCache._cache_hits += 1
            return SparkModelCache._models[archive_path]

        local_path = SparkFiles.get(archive_path)
        temp_dir = tempfile.mkdtemp()
        zip_ref = zipfile.ZipFile(local_path, 'r')
        zip_ref.extractall(temp_dir)
        zip_ref.close()

        # We must rely on a supposed cyclic import here because we want this behavior
        # on the Spark Executors (i.e., don't try to pickle the load_model function).
        from mlflow.pyfunc import load_pyfunc  # pylint: disable=cyclic-import
        SparkModelCache._models[archive_path] = load_pyfunc(temp_dir)
        return SparkModelCache._models[archive_path]
Ejemplo n.º 24
0
def predict(model_uri, input_path, output_path, no_conda):
    """
    Load a pandas DataFrame and runs a python_function model saved with MLflow against it.
    Return the prediction results as a CSV-formatted pandas DataFrame.
    """
    local_model_path = _download_artifact_from_uri(artifact_uri=model_uri)

    model_env_file = _load_model_env(path=local_model_path)
    if not no_conda and model_env_file is not None:
        conda_env_path = os.path.join(local_model_path, model_env_file)
        return _rerun_in_conda(conda_env_path)

    model = load_pyfunc(local_model_path)
    df = pandas.read_csv(input_path)
    result = model.predict(df)
    out_stream = sys.stdout
    if output_path:
        out_stream = open(output_path, 'w')
    pandas.DataFrame(data=result).to_csv(out_stream, header=False, index=False)
Ejemplo n.º 25
0
def main(argv):
    # Builds, trains and evaluates a tf.estimator. Then, exports it for inference, logs the exported model
    # with MLflow, and loads the fitted model back as a PyFunc to make predictions.
    (x_train, y_train), (x_test, y_test) = tf.keras.datasets.boston_housing.load_data()
    # There are 13 features we are using for inference.
    feat_cols = [tf.feature_column.numeric_column(key="features", shape=(x_train.shape[1],))]
    feat_spec = {
        "features": tf.placeholder("float", name="features", shape=[None, x_train.shape[1]])}
    hidden_units = [50, 20]
    steps = 1000
    regressor = tf.estimator.DNNRegressor(hidden_units=hidden_units, feature_columns=feat_cols)
    train_input_fn = tf.estimator.inputs.numpy_input_fn({"features": x_train}, y_train,
                                                        num_epochs=None, shuffle=True)
    with mlflow.start_run() as run:
        print(run)
        mlflow.log_param("Hidden Units", hidden_units)
        mlflow.log_param("Steps", steps)
        regressor.train(train_input_fn, steps=steps)
        test_input_fn = tf.estimator.inputs.numpy_input_fn({"features": x_test}, y_test,
                                                           num_epochs=None, shuffle=True)
        # Compute mean squared error
        mse = regressor.evaluate(test_input_fn, steps=steps)
        mlflow.log_metric("Mean Square Error", mse['average_loss'])
        # Building a receiver function for exporting
        receiver_fn = tf.estimator.export.build_raw_serving_input_receiver_fn(feat_spec)
        temp = tempfile.mkdtemp()
        try:
            saved_estimator_path = regressor.export_savedmodel(temp, receiver_fn).decode("utf-8")
            # Logging the saved model
            mlflow.tensorflow.log_model(tf_saved_model_dir=saved_estimator_path,
                                        tf_meta_graph_tags=[tag_constants.SERVING],
                                        tf_signature_def_key="predict",
                                        artifact_path="model")
            # Reloading the model
            pyfunc_model = pyfunc.load_pyfunc(mlflow.get_artifact_uri('model'))
            df = pd.DataFrame(data=x_test, columns=["features"] * x_train.shape[1])
            # Predicting on the loaded Python Function
            predict_df = pyfunc_model.predict(df)
            predict_df['original_labels'] = y_test
            print(predict_df)
        finally:
            shutil.rmtree(temp)
Ejemplo n.º 26
0
def serve(model_path, run_id, port, host, no_conda):
    """
    Serve a pyfunc model saved with MLflow by launching a webserver on the specified
    host and port. For information about the input data formats accepted by the webserver,
    see the following documentation:
    https://www.mlflow.org/docs/latest/models.html#pyfunc-deployment.

    If a ``run_id`` is specified, ``model-path`` is treated as an artifact path within that run;
    otherwise it is treated as a local path.
    """
    if run_id:
        model_path = _get_model_log_dir(model_path, run_id)

    model_env_file = _load_model_env(model_path)
    if not no_conda and model_env_file is not None:
        conda_env_path = os.path.join(model_path, model_env_file)
        return _rerun_in_conda(conda_env_path)

    app = scoring_server.init(load_pyfunc(model_path))
    app.run(port=port, host=host)
def test_load_model_succeeds_when_data_is_model_file_instead_of_directory(
    module_scoped_subclassed_model, model_path, data
):
    """
    This test verifies that PyTorch models saved in older versions of MLflow are loaded successfully
    by ``mlflow.pytorch.load_model``. The ``data`` path associated with these older models is
    serialized PyTorch model file, as opposed to the current format: a directory containing a
    serialized model file and pickle module information.
    """
    artifact_path = "pytorch_model"
    with mlflow.start_run():
        mlflow.pytorch.log_model(
            artifact_path=artifact_path,
            pytorch_model=module_scoped_subclassed_model,
            conda_env=None,
        )
        model_path = _download_artifact_from_uri(
            "runs:/{run_id}/{artifact_path}".format(
                run_id=mlflow.active_run().info.run_id, artifact_path=artifact_path
            )
        )

    model_conf_path = os.path.join(model_path, "MLmodel")
    model_conf = Model.load(model_conf_path)
    pyfunc_conf = model_conf.flavors.get(pyfunc.FLAVOR_NAME)
    assert pyfunc_conf is not None
    model_data_path = os.path.join(model_path, pyfunc_conf[pyfunc.DATA])
    assert os.path.exists(model_data_path)
    assert mlflow.pytorch._SERIALIZED_TORCH_MODEL_FILE_NAME in os.listdir(model_data_path)
    pyfunc_conf[pyfunc.DATA] = os.path.join(
        model_data_path, mlflow.pytorch._SERIALIZED_TORCH_MODEL_FILE_NAME
    )
    model_conf.save(model_conf_path)

    loaded_pyfunc = pyfunc.load_pyfunc(model_path)

    np.testing.assert_array_almost_equal(
        loaded_pyfunc.predict(data[0]),
        pd.DataFrame(_predict(model=module_scoped_subclassed_model, data=data)),
        decimal=4,
    )
Ejemplo n.º 28
0
def test_model_save_load_built_in_high_level_api(pd_model_built_in_high_level_api, model_path):
    model = pd_model_built_in_high_level_api.model
    test_dataset = pd_model_built_in_high_level_api.inference_dataframe
    mlflow.paddle.save_model(pd_model=model, path=model_path)

    reloaded_pd_model = mlflow.paddle.load_model(model_uri=model_path)
    reloaded_pyfunc = pyfunc.load_pyfunc(model_uri=model_path)

    low_level_test_dataset = [x[0] for x in test_dataset]

    np.testing.assert_array_almost_equal(
        np.array(model.predict(test_dataset)).squeeze(),
        np.array(reloaded_pyfunc.predict(np.array(low_level_test_dataset))).squeeze(),
        decimal=5,
    )

    np.testing.assert_array_almost_equal(
        np.array(reloaded_pd_model(np.array(low_level_test_dataset))).squeeze(),
        np.array(reloaded_pyfunc.predict(np.array(low_level_test_dataset))).squeeze(),
        decimal=5,
    )
Ejemplo n.º 29
0
def test_model_export(tmpdir):
    conda_env = os.path.join(str(tmpdir), "conda_env.yml")
    _mlflow_conda_env(
        conda_env, additional_pip_deps=["pyspark=={}".format(pyspark_version)])
    iris = datasets.load_iris()
    X = iris.data  # we only take the first two features.
    y = iris.target
    pandas_df = pd.DataFrame(X, columns=iris.feature_names)
    pandas_df['label'] = pd.Series(y)
    spark_session = pyspark.sql.SparkSession.builder \
        .config(key="spark_session.python.worker.reuse", value=True) \
        .master("local-cluster[2, 1, 1024]") \
        .getOrCreate()
    spark_df = spark_session.createDataFrame(pandas_df)
    model_path = tmpdir.mkdir("model")
    assembler = VectorAssembler(inputCols=iris.feature_names,
                                outputCol="features")
    lr = LogisticRegression(maxIter=50, regParam=0.1, elasticNetParam=0.8)
    pipeline = Pipeline(stages=[assembler, lr])
    # Fit the model
    model = pipeline.fit(spark_df)
    # Print the coefficients and intercept for multinomial logistic regression
    preds_df = model.transform(spark_df)
    preds1 = [x.prediction for x in preds_df.select("prediction").collect()]
    sparkm.save_model(model, path=str(model_path), conda_env=conda_env)
    reloaded_model = sparkm.load_model(path=str(model_path))
    preds_df_1 = reloaded_model.transform(spark_df)
    preds1_1 = [
        x.prediction for x in preds_df_1.select("prediction").collect()
    ]
    assert preds1 == preds1_1
    m = pyfunc.load_pyfunc(str(model_path))
    preds2 = m.predict(pandas_df)
    assert preds1 == preds2
    preds3 = score_model_in_sagemaker_docker_container(
        model_path=str(model_path), data=pandas_df)
    assert preds1 == preds3
    assert os.path.exists(sparkm.DFS_TMP)
    print(os.listdir(sparkm.DFS_TMP))
    assert not os.listdir(sparkm.DFS_TMP)
Ejemplo n.º 30
0
def test_model_export(spark_model_iris, model_path, spark_conda_env):
    preds_df = spark_model_iris.model.transform(spark_model_iris.training_df)
    preds1 = [x.prediction for x in preds_df.select("prediction").collect()]
    sparkm.save_model(spark_model_iris.model,
                      path=model_path,
                      conda_env=spark_conda_env)
    reloaded_model = sparkm.load_model(path=model_path)
    preds_df_1 = reloaded_model.transform(spark_model_iris.training_df)
    preds1_1 = [
        x.prediction for x in preds_df_1.select("prediction").collect()
    ]
    assert preds1 == preds1_1
    m = pyfunc.load_pyfunc(model_path)
    preds2 = m.predict(spark_model_iris.inference_df)
    assert preds1 == preds2
    preds3 = score_model_in_sagemaker_docker_container(
        model_path=model_path, data=spark_model_iris.inference_df)
    assert preds1 == preds3
    assert os.path.exists(sparkm.DFS_TMP)
    print(os.listdir(sparkm.DFS_TMP))
    # We expect not to delete the DFS tempdir.
    assert os.listdir(sparkm.DFS_TMP)