def _get_flavor_configuration_from_uri(model_uri, flavor_name): """ Obtains the configuration for the specified flavor from the specified MLflow model uri. If the model does not contain the specified flavor, an exception will be thrown. :param model_uri: The path to the root directory of the MLflow model for which to load the specified flavor configuration. :param flavor_name: The name of the flavor configuration to load. :return: The flavor configuration as a dictionary. """ try: ml_model_file = _download_artifact_from_uri( artifact_uri=append_to_uri_path(model_uri, MLMODEL_FILE_NAME)) except Exception as ex: raise MlflowException( 'Failed to download an "{model_file}" model file from "{model_uri}": {ex}' .format(model_file=MLMODEL_FILE_NAME, model_uri=model_uri, ex=ex), RESOURCE_DOES_NOT_EXIST, ) model_conf = Model.load(ml_model_file) if flavor_name not in model_conf.flavors: raise MlflowException( 'Model does not have the "{flavor_name}" flavor'.format( flavor_name=flavor_name), RESOURCE_DOES_NOT_EXIST, ) return model_conf.flavors[flavor_name]
def create_run(self, experiment_id, user_id, start_time, tags): with self.ManagedSessionMaker() as session: experiment = self.get_experiment(experiment_id) self._check_experiment_is_active(experiment) run_id = uuid.uuid4().hex artifact_location = append_to_uri_path( experiment.artifact_location, run_id, SqlAlchemyStore.ARTIFACTS_FOLDER_NAME) run = SqlRun(name="", artifact_uri=artifact_location, run_uuid=run_id, experiment_id=experiment_id, source_type=SourceType.to_string(SourceType.UNKNOWN), source_name="", entry_point_name="", user_id=user_id, status=RunStatus.to_string(RunStatus.RUNNING), start_time=start_time, end_time=None, source_version="", lifecycle_stage=LifecycleStage.ACTIVE) tags_dict = {} for tag in tags: tags_dict[tag.key] = tag.value run.tags = [ SqlTag(key=key, value=value) for key, value in tags_dict.items() ] self._save_to_db(objs=run, session=session) return run.to_mlflow_entity()
def _get_artifact_dir(self, experiment_id, run_uuid): _validate_run_id(run_uuid) return append_to_uri_path( self.get_experiment(experiment_id).artifact_location, run_uuid, FileStore.ARTIFACTS_FOLDER_NAME, )
def get_artifact_uri(run_id, artifact_path=None): """ Get the absolute URI of the specified artifact in the specified run. If `path` is not specified, the artifact root URI of the specified run will be returned; calls to ``log_artifact`` and ``log_artifacts`` write artifact(s) to subdirectories of the artifact root URI. :param run_id: The ID of the run for which to obtain an absolute artifact URI. :param artifact_path: The run-relative artifact path. For example, ``path/to/artifact``. If unspecified, the artifact root URI for the specified run will be returned. :return: An *absolute* URI referring to the specified artifact or the specified run's artifact root. For example, if an artifact path is provided and the specified run uses an S3-backed store, this may be a uri of the form ``s3://<bucket_name>/path/to/artifact/root/path/to/artifact``. If an artifact path is not provided and the specified run uses an S3-backed store, this may be a URI of the form ``s3://<bucket_name>/path/to/artifact/root``. """ if not run_id: raise MlflowException( message= "A run_id must be specified in order to obtain an artifact uri!", error_code=INVALID_PARAMETER_VALUE) store = _get_store() run = store.get_run(run_id) # Maybe move this method to RunsArtifactRepository so the circular dependency is clearer. assert urllib.parse.urlparse( run.info.artifact_uri).scheme != "runs" # avoid an infinite loop if artifact_path is None: return run.info.artifact_uri else: return append_to_uri_path(run.info.artifact_uri, artifact_path)
def create_run(self, experiment_id: str, user_id: str, start_time: int, tags: List[RunTag]) -> Run: run_id = uuid.uuid4().hex experiment = self._get_experiment(experiment_id) self._check_experiment_is_active(experiment) artifact_location = append_to_uri_path( experiment.artifact_location, run_id, ElasticsearchStore.ARTIFACTS_FOLDER_NAME) tags_dict = {} for tag in tags: tags_dict[tag.key] = tag.value run_tags = [ ElasticTag(key=key, value=value) for key, value in tags_dict.items() ] run = ElasticRun(meta={'id': run_id}, run_id=run_id, experiment_id=experiment_id, user_id=user_id, status=RunStatus.to_string(RunStatus.RUNNING), start_time=start_time, end_time=None, lifecycle_stage=LifecycleStage.ACTIVE, artifact_uri=artifact_location, tags=run_tags) run.save() return run.to_mlflow_entity()
def _create_experiment_with_id(self, name, experiment_id, artifact_uri): artifact_uri = artifact_uri or append_to_uri_path( self.artifact_root_uri, str(experiment_id)) self._check_root_dir() meta_dir = mkdir(self.root_directory, str(experiment_id)) experiment = Experiment(experiment_id, name, artifact_uri, LifecycleStage.ACTIVE) experiment_dict = dict(experiment) # tags are added to the file system and are not written to this dict on write # As such, we should not include them in the meta file. del experiment_dict['tags'] write_yaml(meta_dir, FileStore.META_DATA_FILE_NAME, experiment_dict) return experiment_id
def load_model(model_uri, dfs_tmpdir=None): """ Load the Spark MLlib model from the path. :param model_uri: The location, in URI format, of the MLflow model, for example: - ``/Users/me/path/to/local/model`` - ``relative/path/to/local/model`` - ``s3://my_bucket/path/to/model`` - ``runs:/<mlflow_run_id>/run-relative/path/to/model`` - ``models:/<model_name>/<model_version>`` - ``models:/<model_name>/<stage>`` For more information about supported URI schemes, see `Referencing Artifacts <https://www.mlflow.org/docs/latest/concepts.html# artifact-locations>`_. :param dfs_tmpdir: Temporary directory path on Distributed (Hadoop) File System (DFS) or local filesystem if running in local mode. The model is loaded from this destination. Defaults to ``/tmp/mlflow``. :return: pyspark.ml.pipeline.PipelineModel .. code-block:: python :caption: Example from mlflow import spark model = mlflow.spark.load_model("spark-model") # Prepare test documents, which are unlabeled (id, text) tuples. test = spark.createDataFrame([ (4, "spark i j k"), (5, "l m n"), (6, "spark hadoop spark"), (7, "apache hadoop")], ["id", "text"]) # Make predictions on test documents prediction = model.transform(test) """ if RunsArtifactRepository.is_runs_uri(model_uri): runs_uri = model_uri model_uri = RunsArtifactRepository.get_underlying_uri(model_uri) _logger.info("'%s' resolved as '%s'", runs_uri, model_uri) elif ModelsArtifactRepository.is_models_uri(model_uri): runs_uri = model_uri model_uri = ModelsArtifactRepository.get_underlying_uri(model_uri) _logger.info("'%s' resolved as '%s'", runs_uri, model_uri) flavor_conf = _get_flavor_configuration_from_uri(model_uri, FLAVOR_NAME) model_uri = append_to_uri_path(model_uri, flavor_conf["model_data"]) local_model_path = _download_artifact_from_uri(model_uri) _add_code_from_conf_to_system_path(local_model_path, flavor_conf) return _load_model(model_uri=model_uri, dfs_tmpdir_base=dfs_tmpdir)
def _get_flavor_backend(model_uri, **kwargs): with TempDir() as tmp: if ModelsArtifactRepository.is_models_uri(model_uri): underlying_model_uri = ModelsArtifactRepository.get_underlying_uri( model_uri) else: underlying_model_uri = model_uri local_path = _download_artifact_from_uri(append_to_uri_path( underlying_model_uri, "MLmodel"), output_path=tmp.path()) model = Model.load(local_path) flavor_name, flavor_backend = get_flavor_backend(model, **kwargs) if flavor_backend is None: raise Exception("No suitable flavor backend was found for the model.") _logger.info("Selected backend for flavor '%s'", flavor_name) return flavor_backend
def build_image_local_from_model_uri(self, model_uri, base_image, mlflow_home=None, **kwargs): """build PythonModel Backed service image from model_uri :param base_image: image base from which build model image :param mlflow_home: mllfow local copy used to startup the model service in container if None install from pip. :param model_uri: directory contains pyfunc model filesystem. <"pyfunc-filename-system" https://mlflow.org/docs/latest/python_api/mlflow.pyfunc.html#pyfunc-filename-system>_ """ with tempfile.TemporaryDirectory() as tmp_dir: if ModelsArtifactRepository.is_models_uri(model_uri): underlying_model_uri = ModelsArtifactRepository.get_underlying_uri( model_uri) else: underlying_model_uri = model_uri local_path = _download_artifact_from_uri(append_to_uri_path( underlying_model_uri, MLMODEL_FILE_NAME), output_path=tmp_dir) model_meta = Model.load(local_path) flavor_name, flavor_backend = get_flavor_backend( model_meta, **kwargs) if flavor_name is None: raise TypeError("no suitable backend was found for the model") if not flavor_backend.can_build_image(): raise AttributeError( 'flavor {} not support build image'.format(flavor_name)) # always intall mlflow for override office mlflow package in container return_code = flavor_backend.build_image(model_uri, self.image_name, install_mlflow=True, mlflow_home=mlflow_home, base_image=base_image) return True if not return_code else False
def _create_experiment_with_id(self, name, experiment_id, artifact_uri): artifact_uri = artifact_uri or append_to_uri_path( self.artifact_root_uri, str(experiment_id)) dynamodb = self._get_dynamodb_resource() table_name = "_".join( [self.table_prefix, DynamodbStore.EXPERIMENT_TABLE]) table = dynamodb.Table(table_name) exp = Experiment( experiment_id=experiment_id, name=name, artifact_location=artifact_uri, lifecycle_stage=LifecycleStage.ACTIVE, ) response = table.put_item( Item=_entity_to_dict(exp), ReturnConsumedCapacity="TOTAL", ) if response["ResponseMetadata"]["HTTPStatusCode"] != 200: raise MlflowException("DynamoDB connection error") return experiment_id
def _get_artifact_location(self, experiment_id): return append_to_uri_path(self.artifact_root_uri, str(experiment_id))
def validate_append_to_uri_path_test_cases(cases): for input_uri, input_path, expected_output_uri in cases: assert append_to_uri_path(input_uri, input_path) == expected_output_uri assert append_to_uri_path( input_uri, *posixpath.split(input_path)) == expected_output_uri
def log_explanation(predict_function, features, artifact_path=None): r""" Given a ``predict_function`` capable of computing ML model output on the provided ``features``, computes and logs explanations of an ML model's output. Explanations are logged as a directory of artifacts containing the following items generated by `SHAP`_ (SHapley Additive exPlanations). - Base values - SHAP values (computed using `shap.KernelExplainer`_) - Summary bar plot (shows the average impact of each feature on model output) :param predict_function: A function to compute the output of a model (e.g. ``predict_proba`` method of scikit-learn classifiers). Must have the following signature: .. code-block:: python def predict_function(X) -> pred: ... - ``X``: An array-like object whose shape should be (# samples, # features). - ``pred``: An array-like object whose shape should be (# samples) for a regressor or (# classes, # samples) for a classifier. For a classifier, the values in ``pred`` should correspond to the predicted probability of each class. Acceptable array-like object types: - ``numpy.array`` - ``pandas.DataFrame`` - ``shap.common.DenseData`` - ``scipy.sparse matrix`` :param features: A matrix of features to compute SHAP values with. The provided features should have shape (# samples, # features), and can be either of the array-like object types listed above. .. note:: Background data for `shap.KernelExplainer`_ is generated by subsampling ``features`` with `shap.kmeans`_. The background data size is limited to 100 rows for performance reasons. :param artifact_path: The run-relative artifact path to which the explanation is saved. If unspecified, defaults to "model_explanations_shap". :return: Artifact URI of the logged explanations. .. _SHAP: https://github.com/slundberg/shap .. _shap.KernelExplainer: https://shap.readthedocs.io/en/latest/generated /shap.KernelExplainer.html#shap.KernelExplainer .. _shap.kmeans: https://github.com/slundberg/shap/blob/v0.36.0/shap/utils/_legacy.py#L9 .. code-block:: python :caption: Example import os import numpy as np import pandas as pd from sklearn.datasets import load_diabetes from sklearn.linear_model import LinearRegression import mlflow # prepare training data X, y = dataset = load_diabetes(return_X_y=True, as_frame=True) X = pd.DataFrame(dataset.data[:50, :8], columns=dataset.feature_names[:8]) y = dataset.target[:50] # train a model model = LinearRegression() model.fit(X, y) # log an explanation with mlflow.start_run() as run: mlflow.shap.log_explanation(model.predict, X) # list artifacts client = mlflow.tracking.MlflowClient() artifact_path = "model_explanations_shap" artifacts = [x.path for x in client.list_artifacts(run.info.run_id, artifact_path)] print("# artifacts:") print(artifacts) # load back the logged explanation dst_path = client.download_artifacts(run.info.run_id, artifact_path) base_values = np.load(os.path.join(dst_path, "base_values.npy")) shap_values = np.load(os.path.join(dst_path, "shap_values.npy")) print("\n# base_values:") print(base_values) print("\n# shap_values:") print(shap_values[:3]) .. code-block:: text :caption: Output # artifacts: ['model_explanations_shap/base_values.npy', 'model_explanations_shap/shap_values.npy', 'model_explanations_shap/summary_bar_plot.png'] # base_values: 20.502000000000002 # shap_values: [[ 2.09975523 0.4746513 7.63759026 0. ] [ 2.00883109 -0.18816665 -0.14419184 0. ] [ 2.00891772 -0.18816665 -0.14419184 0. ]] .. figure:: ../_static/images/shap-ui-screenshot.png Logged artifacts """ import matplotlib.pyplot as plt import shap artifact_path = _DEFAULT_ARTIFACT_PATH if artifact_path is None else artifact_path with mlflow.utils.autologging_utils.disable_autologging(): background_data = shap.kmeans( features, min(_MAXIMUM_BACKGROUND_DATA_SIZE, len(features))) explainer = shap.KernelExplainer(predict_function, background_data) shap_values = explainer.shap_values(features) _log_numpy(explainer.expected_value, _BASE_VALUES_FILE_NAME, artifact_path) _log_numpy(shap_values, _SHAP_VALUES_FILE_NAME, artifact_path) shap.summary_plot(shap_values, features, plot_type="bar", show=False) fig = plt.gcf() fig.tight_layout() _log_matplotlib_figure(fig, _SUMMARY_BAR_PLOT_FILE_NAME, artifact_path) plt.close(fig) return append_to_uri_path(mlflow.active_run().info.artifact_uri, artifact_path)