def serve( self, model_uri, port, host, timeout, enable_mlserver, synchronous=True, stdout=None, stderr=None, ): """ Generate R model locally. NOTE: The `enable_mlserver` parameter is there to comply with the FlavorBackend interface but is not supported by MLServer yet. https://github.com/SeldonIO/MLServer/issues/183 """ if enable_mlserver: raise Exception( "The MLServer inference server is not yet supported in the R backend." ) if timeout: raise Exception("Timeout is not yet supported in the R backend.") if not synchronous: raise Exception( "RBackend does not support call with synchronous=False") if stdout is not None or stderr is not None: raise Exception( "RBackend does not support redirect stdout/stderr.") model_path = _download_artifact_from_uri(model_uri) command = "mlflow::mlflow_rfunc_serve('{0}', port = {1}, host = '{2}')".format( quote(model_path), port, host) _execute(command)
def test_model_log(lgb_model, model_path): old_uri = mlflow.get_tracking_uri() model = lgb_model.model with TempDir(chdr=True, remove_on_exit=True) as tmp: for should_start_run in [False, True]: try: mlflow.set_tracking_uri("test") if should_start_run: mlflow.start_run() artifact_path = "model" conda_env = os.path.join(tmp.path(), "conda_env.yaml") _mlflow_conda_env(conda_env, additional_pip_deps=["xgboost"]) mlflow.lightgbm.log_model(lgb_model=model, artifact_path=artifact_path, conda_env=conda_env) model_uri = "runs:/{run_id}/{artifact_path}".format( run_id=mlflow.active_run().info.run_id, artifact_path=artifact_path) reloaded_model = mlflow.lightgbm.load_model( model_uri=model_uri) np.testing.assert_array_almost_equal( model.predict(lgb_model.inference_dataframe), reloaded_model.predict(lgb_model.inference_dataframe), ) model_path = _download_artifact_from_uri( artifact_uri=model_uri) model_config = Model.load(os.path.join(model_path, "MLmodel")) assert pyfunc.FLAVOR_NAME in model_config.flavors assert pyfunc.ENV in model_config.flavors[pyfunc.FLAVOR_NAME] env_path = model_config.flavors[pyfunc.FLAVOR_NAME][pyfunc.ENV] assert os.path.exists(os.path.join(model_path, env_path)) finally: mlflow.end_run() mlflow.set_tracking_uri(old_uri)
def test_log_model_persists_specified_conda_env_in_mlflow_model_directory( sklearn_knn_model, main_scoped_model_class, pyfunc_custom_env ): sklearn_artifact_path = "sk_model" with mlflow.start_run(): mlflow.sklearn.log_model(sk_model=sklearn_knn_model, artifact_path=sklearn_artifact_path) sklearn_run_id = mlflow.active_run().info.run_id pyfunc_artifact_path = "pyfunc_model" with mlflow.start_run(): mlflow.pyfunc.log_model( artifact_path=pyfunc_artifact_path, artifacts={ "sk_model": utils_get_artifact_uri( artifact_path=sklearn_artifact_path, run_id=sklearn_run_id ) }, python_model=main_scoped_model_class(predict_fn=None), conda_env=pyfunc_custom_env, ) pyfunc_model_path = _download_artifact_from_uri( "runs:/{run_id}/{artifact_path}".format( run_id=mlflow.active_run().info.run_id, artifact_path=pyfunc_artifact_path ) ) pyfunc_conf = _get_flavor_configuration( model_path=pyfunc_model_path, flavor_name=mlflow.pyfunc.FLAVOR_NAME ) saved_conda_env_path = os.path.join(pyfunc_model_path, pyfunc_conf[mlflow.pyfunc.ENV]) assert os.path.exists(saved_conda_env_path) assert saved_conda_env_path != pyfunc_custom_env with open(pyfunc_custom_env, "r") as f: pyfunc_custom_env_parsed = yaml.safe_load(f) with open(saved_conda_env_path, "r") as f: saved_conda_env_parsed = yaml.safe_load(f) assert saved_conda_env_parsed == pyfunc_custom_env_parsed
def test_log_model_built_in_high_level_api(pd_model_built_in_high_level_api, model_path, tmpdir): model = pd_model_built_in_high_level_api.model _, test_dataset = get_dataset_built_in_high_level_api() try: artifact_path = "model" conda_env = os.path.join(tmpdir, "conda_env.yaml") _mlflow_conda_env(conda_env, additional_pip_deps=["paddle"]) mlflow.paddle.log_model(pd_model=model, artifact_path=artifact_path, conda_env=conda_env, training=True) model_uri = "runs:/{run_id}/{artifact_path}".format( run_id=mlflow.active_run().info.run_id, artifact_path=artifact_path) model_retrain = paddle.Model(UCIHousing()) optim = paddle.optimizer.Adam(learning_rate=0.015, parameters=model.parameters()) model_retrain.prepare(optim, paddle.nn.MSELoss()) model_retrain = mlflow.paddle.load_model(model_uri=model_uri, model=model_retrain) np.testing.assert_array_almost_equal( np.array(model.predict(test_dataset)).squeeze(), np.array(model_retrain.predict(test_dataset)).squeeze(), decimal=5, ) model_path = _download_artifact_from_uri(artifact_uri=model_uri) model_config = Model.load(os.path.join(model_path, "MLmodel")) assert pyfunc.FLAVOR_NAME in model_config.flavors assert pyfunc.ENV in model_config.flavors[pyfunc.FLAVOR_NAME] env_path = model_config.flavors[pyfunc.FLAVOR_NAME][pyfunc.ENV] assert os.path.exists(os.path.join(model_path, env_path)) finally: mlflow.end_run()
def load_model(model_uri, ctx): """ Load a Gluon model from a local file or a run. :param model_uri: The location, in URI format, of the MLflow model. For example: - ``/Users/me/path/to/local/model`` - ``relative/path/to/local/model`` - ``s3://my_bucket/path/to/model`` - ``runs:/<mlflow_run_id>/run-relative/path/to/model`` - ``models:/<model_name>/<model_version>`` - ``models:/<model_name>/<stage>`` For more information about supported URI schemes, see `Referencing Artifacts <https://www.mlflow.org/docs/latest/concepts.html# artifact-locations>`_. :param ctx: Either CPU or GPU. :return: A Gluon model instance. .. code-block:: python :caption: Example # Load persisted model as a Gluon model, make inferences against an NDArray model = mlflow.gluon.load_model("runs:/" + gluon_random_data_run.info.run_id + "/model") model(nd.array(np.random.rand(1000, 1, 32))) """ local_model_path = _download_artifact_from_uri(artifact_uri=model_uri) model_arch_path = os.path.join(local_model_path, "data", _MODEL_SAVE_PATH) + "-symbol.json" model_params_path = os.path.join(local_model_path, "data", _MODEL_SAVE_PATH) + "-0000.params" symbol = sym.load(model_arch_path) inputs = sym.var('data', dtype='float32') net = gluon.SymbolBlock(symbol, inputs) net.collect_params().load(model_params_path, ctx) return net
def load_model(model_uri, **kwargs): """ Load a Keras model from a local file or a run. Extra arguments are passed through to keras.load_model. :param model_uri: The location, in URI format, of the MLflow model. For example: - ``/Users/me/path/to/local/model`` - ``relative/path/to/local/model`` - ``s3://my_bucket/path/to/model`` - ``runs:/<mlflow_run_id>/run-relative/path/to/model`` - ``models:/<model_name>/<model_version>`` - ``models:/<model_name>/<stage>`` For more information about supported URI schemes, see `Referencing Artifacts <https://www.mlflow.org/docs/latest/concepts.html# artifact-locations>`_. :return: A Keras model instance. .. code-block:: python :caption: Example # Load persisted model as a Keras model or as a PyFunc, call predict() on a pandas DataFrame keras_model = mlflow.keras.load_model("runs:/96771d893a5e46159d9f3b49bf9013e2" + "/models") predictions = keras_model.predict(x_test) """ local_model_path = _download_artifact_from_uri(artifact_uri=model_uri) flavor_conf = _get_flavor_configuration(model_path=local_model_path, flavor_name=FLAVOR_NAME) keras_module = importlib.import_module( flavor_conf.get("keras_module", "keras")) keras_model_artifacts_path = os.path.join( local_model_path, flavor_conf.get("data", _MODEL_SAVE_PATH)) return _load_model(model_path=keras_model_artifacts_path, keras_module=keras_module, **kwargs)
def test_sklearn_log_explainer(): """ Tests mlflow.shap log_explainer with mlflow serialization of the underlying model """ with mlflow.start_run() as run: run_id = run.info.run_id X, y = shap.datasets.boston() model = sklearn.ensemble.RandomForestRegressor(n_estimators=100) model.fit(X, y) explainer_original = shap.Explainer(model.predict, X, algorithm="permutation") shap_values_original = explainer_original(X[:5]) mlflow.shap.log_explainer(explainer_original, "test_explainer") explainer_uri = "runs:/" + run_id + "/test_explainer" explainer_loaded = mlflow.shap.load_explainer(explainer_uri) shap_values_new = explainer_loaded(X[:5]) explainer_path = _download_artifact_from_uri( artifact_uri=explainer_uri) flavor_conf = _get_flavor_configuration( model_path=explainer_path, flavor_name=mlflow.shap.FLAVOR_NAME) underlying_model_flavor = flavor_conf["underlying_model_flavor"] assert underlying_model_flavor == mlflow.sklearn.FLAVOR_NAME np.testing.assert_array_equal(shap_values_original.base_values, shap_values_new.base_values) np.testing.assert_allclose(shap_values_original.values, shap_values_new.values, rtol=100, atol=100)
def load_model(model_uri: str, suppress_warnings: bool = True) -> PyFuncModel: """ Load a model stored in Python function format. :param model_uri: The location, in URI format, of the MLflow model. For example: - ``/Users/me/path/to/local/model`` - ``relative/path/to/local/model`` - ``s3://my_bucket/path/to/model`` - ``runs:/<mlflow_run_id>/run-relative/path/to/model`` - ``models:/<model_name>/<model_version>`` - ``models:/<model_name>/<stage>`` For more information about supported URI schemes, see `Referencing Artifacts <https://www.mlflow.org/docs/latest/concepts.html# artifact-locations>`_. :param suppress_warnings: If ``True``, non-fatal warning messages associated with the model loading process will be suppressed. If ``False``, these warning messages will be emitted. """ local_path = _download_artifact_from_uri(artifact_uri=model_uri) model_meta = Model.load(os.path.join(local_path, MLMODEL_FILE_NAME)) conf = model_meta.flavors.get(FLAVOR_NAME) if conf is None: raise MlflowException( 'Model does not have the "{flavor_name}" flavor'.format(flavor_name=FLAVOR_NAME), RESOURCE_DOES_NOT_EXIST, ) model_py_version = conf.get(PY_VERSION) if not suppress_warnings: _warn_potentially_incompatible_py_version_if_necessary(model_py_version=model_py_version) if CODE in conf and conf[CODE]: code_path = os.path.join(local_path, conf[CODE]) mlflow.pyfunc.utils._add_code_to_system_path(code_path=code_path) data_path = os.path.join(local_path, conf[DATA]) if (DATA in conf) else local_path model_impl = importlib.import_module(conf[MAIN])._load_pyfunc(data_path) return PyFuncModel(model_meta=model_meta, model_impl=model_impl)
def load_model(model_uri, dst_path=None): """ Load an XGBoost model from a local file or a run. :param model_uri: The location, in URI format, of the MLflow model. For example: - ``/Users/me/path/to/local/model`` - ``relative/path/to/local/model`` - ``s3://my_bucket/path/to/model`` - ``runs:/<mlflow_run_id>/run-relative/path/to/model`` For more information about supported URI schemes, see `Referencing Artifacts <https://www.mlflow.org/docs/latest/tracking.html# artifact-locations>`_. :param dst_path: The local filesystem path to which to download the model artifact. This directory must already exist. If unspecified, a local output path will be created. :return: An XGBoost model. An instance of either `xgboost.Booster`_ or XGBoost scikit-learn models, depending on the saved model class specification. """ local_model_path = _download_artifact_from_uri(artifact_uri=model_uri, output_path=dst_path) return _load_model(path=local_model_path)
def load_model(model_uri): """ Load a fastai model from a local file or a run. :param model_uri: The location, in URI format, of the MLflow model. For example: - ``/Users/me/path/to/local/model`` - ``relative/path/to/local/model`` - ``s3://my_bucket/path/to/model`` - ``runs:/<mlflow_run_id>/run-relative/path/to/model`` For more information about supported URI schemes, see `Referencing Artifacts <https://www.mlflow.org/docs/latest/tracking.html# artifact-locations>`_. :return: A fastai model (an instance of `fastai.Learner`_). """ local_model_path = _download_artifact_from_uri(artifact_uri=model_uri) flavor_conf = _get_flavor_configuration(model_path=local_model_path, flavor_name=FLAVOR_NAME) model_file_path = os.path.join(local_model_path, flavor_conf.get("data", "model.fastai")) return _load_model(path=model_file_path)
def serve(self, model_uri, port, host, enable_mlserver): # pylint: disable=W0221 """ Serve pyfunc model locally. """ local_path = _download_artifact_from_uri(model_uri) server_implementation = mlserver if enable_mlserver else scoring_server command, command_env = server_implementation.get_cmd( local_path, port, host, self._nworkers) if not self._no_conda and ENV in self._config: conda_env_path = os.path.join(local_path, self._config[ENV]) return _execute_in_conda_env(conda_env_path, command, self._install_mlflow, command_env=command_env) else: _logger.info("=== Running command '%s'", command) if os.name != "nt": subprocess.Popen(["bash", "-c", command], env=command_env).wait() else: subprocess.Popen(command, env=command_env).wait()
def copy_model_into_container(dockerfile_context_dir): model_cwd = os.path.join(dockerfile_context_dir, "model_dir") os.mkdir(model_cwd) model_path = _download_artifact_from_uri(model_uri, output_path=model_cwd) return """ COPY {model_dir} /opt/ml/model RUN python -c \ 'from mlflow.models.container import _install_pyfunc_deps;\ _install_pyfunc_deps(\ "/opt/ml/model", \ install_mlflow={install_mlflow}, \ enable_mlserver={enable_mlserver})' ENV {disable_env}="true" ENV {ENABLE_MLSERVER}={enable_mlserver} """.format( disable_env=DISABLE_ENV_CREATION, model_dir=str( posixpath.join("model_dir", os.path.basename(model_path))), install_mlflow=repr(install_mlflow), ENABLE_MLSERVER=ENABLE_MLSERVER, enable_mlserver=repr(enable_mlserver), )
def test_model_log_persists_specified_conda_env_in_mlflow_model_directory( model, keras_custom_env): artifact_path = "model" with mlflow.start_run(): mlflow.keras.log_model(keras_model=model, artifact_path=artifact_path, conda_env=keras_custom_env) model_path = _download_artifact_from_uri( "runs:/{run_id}/{artifact_path}".format( run_id=mlflow.active_run().info.run_id, artifact_path=artifact_path)) pyfunc_conf = _get_flavor_configuration(model_path=model_path, flavor_name=pyfunc.FLAVOR_NAME) saved_conda_env_path = os.path.join(model_path, pyfunc_conf[pyfunc.ENV]) assert os.path.exists(saved_conda_env_path) assert saved_conda_env_path != keras_custom_env with open(keras_custom_env, "r") as f: keras_custom_env_parsed = yaml.safe_load(f) with open(saved_conda_env_path, "r") as f: saved_conda_env_parsed = yaml.safe_load(f) assert saved_conda_env_parsed == keras_custom_env_parsed
def test_log_model(cb_model, tmpdir): model, inference_dataframe = cb_model with mlflow.start_run(): artifact_path = "model" conda_env = os.path.join(tmpdir.strpath, "conda_env.yaml") _mlflow_conda_env(conda_env, additional_pip_deps=["catboost"]) model_info = mlflow.catboost.log_model(model, artifact_path, conda_env=conda_env) model_uri = "runs:/{}/{}".format(mlflow.active_run().info.run_id, artifact_path) assert model_info.model_uri == model_uri loaded_model = mlflow.catboost.load_model(model_uri) np.testing.assert_array_almost_equal( model.predict(inference_dataframe), loaded_model.predict(inference_dataframe), ) local_path = _download_artifact_from_uri(model_uri) model_config = Model.load(os.path.join(local_path, "MLmodel")) assert pyfunc.FLAVOR_NAME in model_config.flavors assert pyfunc.ENV in model_config.flavors[pyfunc.FLAVOR_NAME] env_path = model_config.flavors[pyfunc.FLAVOR_NAME][pyfunc.ENV] assert os.path.exists(os.path.join(local_path, env_path))
def test_mleap_model_log(spark_model_iris): artifact_path = "model" register_model_patch = mock.patch("mlflow.register_model") with mlflow.start_run(), register_model_patch: mlflow.spark.log_model( spark_model=spark_model_iris.model, sample_input=spark_model_iris.spark_df, artifact_path=artifact_path, registered_model_name="Model1", ) model_uri = "runs:/{run_id}/{artifact_path}".format( run_id=mlflow.active_run().info.run_id, artifact_path=artifact_path) mlflow.register_model.assert_called_once_with( model_uri, "Model1", await_registration_for=DEFAULT_AWAIT_MAX_SLEEP_SECONDS) model_path = _download_artifact_from_uri(artifact_uri=model_uri) config_path = os.path.join(model_path, "MLmodel") mlflow_model = Model.load(config_path) assert mlflow.spark.FLAVOR_NAME in mlflow_model.flavors assert mlflow.mleap.FLAVOR_NAME in mlflow_model.flavors
def test_model_log_without_specified_conda_env_uses_default_env_with_expected_dependencies( sklearn_knn_model, ): artifact_path = "model" knn_model = sklearn_knn_model.model with mlflow.start_run(): mlflow.sklearn.log_model( sk_model=knn_model, artifact_path=artifact_path, conda_env=None, serialization_format=mlflow.sklearn.SERIALIZATION_FORMAT_PICKLE, ) model_uri = "runs:/{run_id}/{artifact_path}".format( run_id=mlflow.active_run().info.run_id, artifact_path=artifact_path) model_path = _download_artifact_from_uri(artifact_uri=model_uri) pyfunc_conf = _get_flavor_configuration(model_path=model_path, flavor_name=pyfunc.FLAVOR_NAME) conda_env_path = os.path.join(model_path, pyfunc_conf[pyfunc.ENV]) with open(conda_env_path, "r") as f: conda_env = yaml.safe_load(f) assert conda_env == mlflow.sklearn.get_default_conda_env()
def maybe_copy_from_uri(cls, src_uri, dst_path): """ Conditionally copy the file to the Hadoop DFS from the source uri. In case the file is already on the Hadoop DFS do nothing. :return: If copied, return new target location, otherwise return source uri. """ try: # makeQualified throws if wrong schema / uri dfs_path = cls._fs().makeQualified(cls._remote_path(src_uri)) if cls._try_file_exists(dfs_path): _logger.info( "File '%s' is already on DFS, copy is not necessary.", src_uri) return src_uri except Exception: _logger.info("URI '%s' does not point to the current DFS.", src_uri) _logger.info( "File '%s' not found on DFS. Will attempt to upload the file.", src_uri) return cls.maybe_copy_from_local_file( _download_artifact_from_uri(src_uri), dst_path)
def test_model_log_load(sklearn_knn_model, iris_data, tmpdir): sk_model_path = os.path.join(str(tmpdir), "knn.pkl") with open(sk_model_path, "wb") as f: pickle.dump(sklearn_knn_model, f) pyfunc_artifact_path = "pyfunc_model" with mlflow.start_run(): mlflow.pyfunc.log_model(artifact_path=pyfunc_artifact_path, data_path=sk_model_path, loader_module=os.path.basename(__file__)[:-3], code_path=[__file__]) pyfunc_model_path = _download_artifact_from_uri( "runs:/{run_id}/{artifact_path}".format( run_id=mlflow.active_run().info.run_id, artifact_path=pyfunc_artifact_path)) model_config = Model.load(os.path.join(pyfunc_model_path, "MLmodel")) assert mlflow.pyfunc.FLAVOR_NAME in model_config.flavors assert mlflow.pyfunc.PY_VERSION in model_config.flavors[ mlflow.pyfunc.FLAVOR_NAME] reloaded_model = mlflow.pyfunc.load_pyfunc(pyfunc_model_path) np.testing.assert_array_equal(sklearn_knn_model.predict(iris_data[0]), reloaded_model.predict(iris_data[0]))
def test_model_log_persists_specified_conda_env_in_mlflow_model_directory( h2o_iris_model, h2o_custom_env): artifact_path = "model" with mlflow.start_run(): mlflow.h2o.log_model(h2o_model=h2o_iris_model.model, artifact_path=artifact_path, conda_env=h2o_custom_env) model_path = _download_artifact_from_uri( "runs:/{run_id}/{artifact_path}".format( run_id=mlflow.active_run().info.run_id, artifact_path=artifact_path)) pyfunc_conf = _get_flavor_configuration(model_path=model_path, flavor_name=pyfunc.FLAVOR_NAME) saved_conda_env_path = os.path.join(model_path, pyfunc_conf[pyfunc.ENV]) assert os.path.exists(saved_conda_env_path) assert saved_conda_env_path != h2o_custom_env with open(h2o_custom_env, "r") as f: h2o_custom_env_text = f.read() with open(saved_conda_env_path, "r") as f: saved_conda_env_text = f.read() assert saved_conda_env_text == h2o_custom_env_text
def serve(self, model_uri, port, host): """ Serve pyfunc model locally. """ local_path = _download_artifact_from_uri(model_uri) # NB: Absolute windows paths do not work with mlflow apis, use file uri to ensure # platform compatibility. local_uri = path_to_local_file_uri(local_path) command = ( "gunicorn --timeout=60 -b {host}:{port} -w {nworkers} ${{GUNICORN_CMD_ARGS}}" " -- mlflow.pyfunc.scoring_server.wsgi:app").format( host=host, port=port, nworkers=self._nworkers) command_env = os.environ.copy() command_env[scoring_server._SERVER_MODEL_PATH] = local_uri if not self._no_conda and ENV in self._config: conda_env_path = os.path.join(local_path, self._config[ENV]) return _execute_in_conda_env(conda_env_path, command, self._install_mlflow, command_env=command_env) else: _logger.info("=== Running command '%s'", command) subprocess.Popen(["bash", "-c", command], env=command_env).wait()
def test_log_model_without_specified_conda_env_uses_default_env_with_expected_dependencies( saved_tf_iris_model, ): artifact_path = "model" with mlflow.start_run(): mlflow.tensorflow.log_model( tf_saved_model_dir=saved_tf_iris_model.path, tf_meta_graph_tags=saved_tf_iris_model.meta_graph_tags, tf_signature_def_key=saved_tf_iris_model.signature_def_key, artifact_path=artifact_path, conda_env=None, ) model_uri = "runs:/{run_id}/{artifact_path}".format( run_id=mlflow.active_run().info.run_id, artifact_path=artifact_path) model_path = _download_artifact_from_uri(artifact_uri=model_uri) pyfunc_conf = _get_flavor_configuration(model_path=model_path, flavor_name=pyfunc.FLAVOR_NAME) conda_env_path = os.path.join(model_path, pyfunc_conf[pyfunc.ENV]) with open(conda_env_path, "r") as f: conda_env = yaml.safe_load(f) assert conda_env == mlflow.tensorflow.get_default_conda_env()
def test_model_log(prophet_model): old_uri = mlflow.get_tracking_uri() with TempDir(chdr=True, remove_on_exit=True) as tmp: for should_start_run in [False, True]: try: mlflow.set_tracking_uri("test") if should_start_run: mlflow.start_run() artifact_path = "prophet" conda_env = os.path.join(tmp.path(), "conda_env.yaml") _mlflow_conda_env(conda_env, additional_pip_deps=["pystan", "prophet"]) mlflow.prophet.log_model(pr_model=prophet_model.model, artifact_path=artifact_path, conda_env=conda_env) model_uri = f"runs:/{mlflow.active_run().info.run_id}/{artifact_path}" reloaded_prophet_model = mlflow.prophet.load_model( model_uri=model_uri) np.testing.assert_array_equal( generate_forecast(prophet_model.model, FORECAST_HORIZON), generate_forecast(reloaded_prophet_model, FORECAST_HORIZON), ) model_path = _download_artifact_from_uri( artifact_uri=model_uri) model_config = Model.load(os.path.join(model_path, "MLmodel")) assert pyfunc.FLAVOR_NAME in model_config.flavors assert pyfunc.ENV in model_config.flavors[pyfunc.FLAVOR_NAME] env_path = model_config.flavors[pyfunc.FLAVOR_NAME][pyfunc.ENV] assert os.path.exists(os.path.join(model_path, env_path)) finally: mlflow.end_run() mlflow.set_tracking_uri(old_uri)
def test_deploy_creates_sagemaker_transform_job_and_s3_resources_with_expected_names_from_s3( pretrained_model, sagemaker_client ): local_model_path = _download_artifact_from_uri(pretrained_model.model_uri) artifact_path = "model" region_name = sagemaker_client.meta.region_name default_bucket = mfs._get_default_s3_bucket(region_name) s3_artifact_repo = S3ArtifactRepository("s3://{}".format(default_bucket)) s3_artifact_repo.log_artifacts(local_model_path, artifact_path=artifact_path) model_s3_uri = "s3://{bucket_name}/{artifact_path}".format( bucket_name=default_bucket, artifact_path=pretrained_model.model_path ) job_name = "test-job" mfs.deploy_transform_job( job_name=job_name, model_uri=model_s3_uri, s3_input_data_type="Some Data Type", s3_input_uri="Some Input Uri", content_type="Some Content Type", s3_output_path="Some Output Path", archive=True, ) transform_job_description = sagemaker_client.describe_transform_job(TransformJobName=job_name) model_name = transform_job_description["ModelName"] assert model_name in [model["ModelName"] for model in sagemaker_client.list_models()["Models"]] s3_client = boto3.client("s3", region_name=region_name) object_names = [ entry["Key"] for entry in s3_client.list_objects(Bucket=default_bucket)["Contents"] ] assert any(model_name in object_name for object_name in object_names) assert job_name in [ transform_job["TransformJobName"] for transform_job in sagemaker_client.list_transform_jobs()["TransformJobSummaries"] ]
def test_load_model_succeeds_when_data_is_model_file_instead_of_directory( module_scoped_subclassed_model, model_path, data): """ This test verifies that PyTorch models saved in older versions of MLflow are loaded successfully by ``mlflow.pytorch.load_model``. The ``data`` path associated with these older models is serialized PyTorch model file, as opposed to the current format: a directory containing a serialized model file and pickle module information. """ artifact_path = "pytorch_model" with mlflow.start_run(): mlflow.pytorch.log_model(artifact_path=artifact_path, pytorch_model=module_scoped_subclassed_model, conda_env=None) model_path = _download_artifact_from_uri( "runs:/{run_id}/{artifact_path}".format( run_id=mlflow.active_run().info.run_id, artifact_path=artifact_path)) model_conf_path = os.path.join(model_path, "MLmodel") model_conf = Model.load(model_conf_path) pyfunc_conf = model_conf.flavors.get(pyfunc.FLAVOR_NAME) assert pyfunc_conf is not None model_data_path = os.path.join(model_path, pyfunc_conf[pyfunc.DATA]) assert os.path.exists(model_data_path) assert mlflow.pytorch._SERIALIZED_TORCH_MODEL_FILE_NAME in os.listdir( model_data_path) pyfunc_conf[pyfunc.DATA] = os.path.join( model_data_path, mlflow.pytorch._SERIALIZED_TORCH_MODEL_FILE_NAME) model_conf.save(model_conf_path) loaded_pyfunc = pyfunc.load_pyfunc(model_path) np.testing.assert_array_almost_equal( loaded_pyfunc.predict(data[0]), pd.DataFrame(_predict(model=module_scoped_subclassed_model, data=data)), decimal=4)
def load_model(model_uri, dfs_tmpdir=None): """ Load the Spark MLlib model from the path. :param model_uri: The location, in URI format, of the MLflow model, for example: - ``/Users/me/path/to/local/model`` - ``relative/path/to/local/model`` - ``s3://my_bucket/path/to/model`` - ``runs:/<mlflow_run_id>/run-relative/path/to/model`` For more information about supported URI schemes, see the `Artifacts Documentation <https://www.mlflow.org/docs/latest/tracking.html# supported-artifact-stores>`_. :param dfs_tmpdir: Temporary directory path on Distributed (Hadoop) File System (DFS) or local filesystem if running in local mode. The model is loaded from this destination. Defaults to ``/tmp/mlflow``. :return: pyspark.ml.pipeline.PipelineModel >>> from mlflow import spark >>> model = mlflow.spark.load_model("spark-model") >>> # Prepare test documents, which are unlabeled (id, text) tuples. >>> test = spark.createDataFrame([ ... (4, "spark i j k"), ... (5, "l m n"), ... (6, "spark hadoop spark"), ... (7, "apache hadoop")], ["id", "text"]) >>> # Make predictions on test documents. >>> prediction = model.transform(test) """ local_model_path = _download_artifact_from_uri(artifact_uri=model_uri) flavor_conf = _get_flavor_configuration(model_path=local_model_path, flavor_name=FLAVOR_NAME) spark_model_artifacts_path = os.path.join(local_model_path, flavor_conf['model_data']) return _load_model(model_path=spark_model_artifacts_path, dfs_tmpdir=dfs_tmpdir)
def test_model_log(): with TempDir(chdr=True) as tmp: experiment_id = mlflow.create_experiment("test") sig = ModelSignature( inputs=Schema([ColSpec("integer", "x"), ColSpec("integer", "y")]), outputs=Schema([ColSpec(name=None, type="double")]), ) input_example = {"x": 1, "y": 2} with mlflow.start_run(experiment_id=experiment_id) as r: Model.log("some/path", TestFlavor, signature=sig, input_example=input_example) local_path = _download_artifact_from_uri("runs:/{}/some/path".format( r.info.run_id), output_path=tmp.path("")) loaded_model = Model.load(os.path.join(local_path, "MLmodel")) assert loaded_model.run_id == r.info.run_id assert loaded_model.artifact_path == "some/path" assert loaded_model.flavors == { "flavor1": { "a": 1, "b": 2 }, "flavor2": { "x": 1, "y": 2 }, } assert loaded_model.signature == sig path = os.path.join( local_path, loaded_model.saved_input_example_info["artifact_path"]) x = _dataframe_from_json(path) assert x.to_dict(orient="records")[0] == input_example
def test_model_log(sklearn_logreg_model, model_path): with TempDir(chdr=True, remove_on_exit=True) as tmp: for should_start_run in [False, True]: try: if should_start_run: mlflow.start_run() artifact_path = "linear" conda_env = os.path.join(tmp.path(), "conda_env.yaml") _mlflow_conda_env(conda_env, additional_pip_deps=["scikit-learn"]) model_info = mlflow.sklearn.log_model( sk_model=sklearn_logreg_model.model, artifact_path=artifact_path, conda_env=conda_env, ) model_uri = "runs:/{run_id}/{artifact_path}".format( run_id=mlflow.active_run().info.run_id, artifact_path=artifact_path ) assert model_info.model_uri == model_uri reloaded_logsklearn_knn_model = mlflow.sklearn.load_model(model_uri=model_uri) np.testing.assert_array_equal( sklearn_logreg_model.model.predict(sklearn_logreg_model.inference_data), reloaded_logsklearn_knn_model.predict(sklearn_logreg_model.inference_data), ) model_path = _download_artifact_from_uri(artifact_uri=model_uri) model_config = Model.load(os.path.join(model_path, "MLmodel")) assert pyfunc.FLAVOR_NAME in model_config.flavors assert pyfunc.ENV in model_config.flavors[pyfunc.FLAVOR_NAME] env_path = model_config.flavors[pyfunc.FLAVOR_NAME][pyfunc.ENV] assert os.path.exists(os.path.join(model_path, env_path)) finally: mlflow.end_run()
def test_extra_files_log_model(create_extra_files, sequential_model): extra_files, contents_expected = create_extra_files with mlflow.start_run(): mlflow.pytorch.log_model( pytorch_model=sequential_model, artifact_path="models", extra_files=extra_files ) model_uri = "runs:/{run_id}/{model_path}".format( run_id=mlflow.active_run().info.run_id, model_path="models" ) with TempDir(remove_on_exit=True) as tmp: model_path = _download_artifact_from_uri(model_uri, tmp.path()) model_config_path = os.path.join(model_path, "MLmodel") model_config = Model.load(model_config_path) flavor_config = model_config.flavors["pytorch"] assert "extra_files" in flavor_config loaded_extra_files = flavor_config["extra_files"] for loaded_extra_file, content_expected in zip(loaded_extra_files, contents_expected): assert "path" in loaded_extra_file extra_file_path = os.path.join(model_path, loaded_extra_file["path"]) with open(extra_file_path) as fp: assert fp.read() == content_expected
def load_model(model_uri): """ Load a scikit-learn model from a local file or a run. :param model_uri: The location, in URI format, of the MLflow model, for example: - ``/Users/me/path/to/local/model`` - ``relative/path/to/local/model`` - ``s3://my_bucket/path/to/model`` - ``runs:/<mlflow_run_id>/run-relative/path/to/model`` - ``models:/<model_name>/<model_version>`` - ``models:/<model_name>/<stage>`` For more information about supported URI schemes, see `Referencing Artifacts <https://www.mlflow.org/docs/latest/concepts.html# artifact-locations>`_. :return: A scikit-learn model. .. code-block:: python :caption: Example import mlflow.sklearn sk_model = mlflow.sklearn.load_model("runs:/96771d893a5e46159d9f3b49bf9013e2/sk_models") # use Pandas DataFrame to make predictions pandas_df = ... predictions = sk_model.predict(pandas_df) """ local_model_path = _download_artifact_from_uri(artifact_uri=model_uri) flavor_conf = _get_flavor_configuration(model_path=local_model_path, flavor_name=FLAVOR_NAME) sklearn_model_artifacts_path = os.path.join(local_model_path, flavor_conf["pickled_model"]) serialization_format = flavor_conf.get("serialization_format", SERIALIZATION_FORMAT_PICKLE) return _load_model_from_local_file( path=sklearn_model_artifacts_path, serialization_format=serialization_format )
def predict( self, model_uri, input_path, output_path, content_type, json_format, ): """ Generate predictions using generic python model saved with MLflow. Return the prediction results as a JSON. """ local_path = _download_artifact_from_uri(model_uri) # NB: Absolute windows paths do not work with mlflow apis, use file uri to ensure # platform compatibility. local_uri = path_to_local_file_uri(local_path) if not self._no_conda and ENV in self._config: conda_env_path = os.path.join(local_path, self._config[ENV]) command = ( 'python -c "from mlflow.pyfunc.scoring_server import _predict; _predict(' "model_uri={model_uri}, " "input_path={input_path}, " "output_path={output_path}, " "content_type={content_type}, " 'json_format={json_format})"').format( model_uri=repr(local_uri), input_path=repr(input_path), output_path=repr(output_path), content_type=repr(content_type), json_format=repr(json_format), ) return _execute_in_conda_env(conda_env_path, command, self._install_mlflow) else: scoring_server._predict(local_uri, input_path, output_path, content_type, json_format)