def _log_posttraining_metadata(estimator, spark_model, params): if _is_parameter_search_estimator(estimator): try: # Fetch environment-specific tags (e.g., user and source) to ensure that lineage # information is consistent with the parent run child_tags = context_registry.resolve_tags() child_tags.update( {MLFLOW_AUTOLOGGING: AUTOLOGGING_INTEGRATION_NAME}) _create_child_runs_for_parameter_search( parent_estimator=estimator, parent_model=spark_model, parent_run=mlflow.active_run(), child_tags=child_tags, ) except Exception: import traceback msg = ( "Encountered exception during creation of child runs for parameter search." " Child runs may be missing. Exception: {}".format( traceback.format_exc())) _logger.warning(msg) estimator_param_maps = _get_tuning_param_maps( estimator, estimator._autologging_metadata.uid_to_indexed_name_map) metrics_dict, best_index = _get_param_search_metrics_and_best_index( estimator, spark_model) _log_parameter_search_results_as_artifact( estimator_param_maps, metrics_dict, mlflow.active_run().info.run_id) # Log best_param_map as JSON artifact best_param_map = estimator_param_maps[best_index] mlflow.log_dict(best_param_map, artifact_file="best_parameters.json") # Log best_param_map as autologging parameters as well _log_estimator_params({ f"best_{param_name}": param_value for param_name, param_value in best_param_map.items() }) if log_models: if _should_log_model(spark_model): # TODO: support model signature mlflow.spark.log_model( spark_model, artifact_path="model", ) if _is_parameter_search_model(spark_model): mlflow.spark.log_model( spark_model.bestModel, artifact_path="best_model", ) else: _logger.warning( _get_warning_msg_for_skip_log_model(spark_model))
def experiment(self) -> MlflowClient: if self._experiment_id is None: expt = self._mlflow_client.get_experiment_by_name( self._experiment_name) if expt is not None: self._experiment_id = expt.experiment_id else: if self._run_id is not None: raise ValueError( f'Experiment with name {self._experiment_name} not found' ) logger.warning( f'Experiment with name {self._experiment_name} not found. Creating it.' ) self._experiment_id = self._mlflow_client.create_experiment( name=self._experiment_name) if self._run_id is None: run = self._mlflow_client.create_run( experiment_id=self._experiment_id, tags=resolve_tags(self.tags)) self._run_id = run.info.run_id return self._mlflow_client
def test_resolve_tags_no_arg(mock_run_context_providers): assert resolve_tags() == { "one": "override", "two": "two-val", "three": "three-val", "new": "new-val" }
def log_model(self, model, artifact_path: str): mlflow.set_tracking_uri(self._tracking_uri) mlflow.set_experiment(self._experiment_name) with mlflow.start_run(run_id=self.run_id, tags=resolve_tags(self.tags)): mlflow.sklearn.log_model(model, artifact_path)
def test_resolve_tags(mock_run_context_providers): tags_arg = {"two": "arg-override", "arg": "arg-val"} assert resolve_tags(tags_arg) == { "one": "override", "two": "arg-override", "three": "three-val", "new": "new-val", "arg": "arg-val" }
def test_run_context_provider_registry_with_installed_plugin(tmp_wkdir): """This test requires the package in tests/resources/mlflow-test-plugin to be installed""" reload(mlflow.tracking.context.registry) from mlflow_test_plugin import PluginRunContextProvider assert PluginRunContextProvider in _currently_registered_run_context_provider_classes() # The test plugin's context provider always returns False from in_context # to avoid polluting tags in developers' environments. The following mock overrides this to # perform the integration test. with mock.patch.object(PluginRunContextProvider, "in_context", return_value=True): assert resolve_tags()["test"] == "tag"
def _log_posttraining_metadata(estimator, *args, **kwargs): """ Records metadata for a scikit-learn estimator after training has completed. This is intended to be invoked within a patched scikit-learn training routine (e.g., `fit()`, `fit_transform()`, ...) and assumes the existence of an active MLflow run that can be referenced via the fluent Tracking API. :param estimator: The scikit-learn estimator for which to log metadata. :param args: The arguments passed to the scikit-learn training routine (e.g., `fit()`, `fit_transform()`, ...). :param kwargs: The keyword arguments passed to the scikit-learn training routine. """ if hasattr(estimator, "score"): try: score_args = _get_args_for_score(estimator.score, estimator.fit, args, kwargs) training_score = estimator.score(*score_args) except Exception as e: msg = ( estimator.score.__qualname__ + " failed. The 'training_score' metric will not be recorded. Scoring error: " + str(e)) _logger.warning(msg) else: try_mlflow_log(mlflow.log_metric, "training_score", training_score) # log common metrics and artifacts for estimators (classifier, regressor) _log_specialized_estimator_content(estimator, mlflow.active_run().info.run_id, args, kwargs) def get_input_example(): # Fetch an input example using the first several rows of the array-like # training data supplied to the training routine (e.g., `fit()`) fit_arg_names = _get_arg_names(estimator.fit) X_var_name, y_var_name = fit_arg_names[:2] input_example = _get_Xy(args, kwargs, X_var_name, y_var_name)[0][:INPUT_EXAMPLE_SAMPLE_ROWS] return input_example def infer_model_signature(input_example): if not hasattr(estimator, "predict"): raise Exception( "the trained model does not specify a `predict` function, " + "which is required in order to infer the signature") return infer_signature(input_example, estimator.predict(input_example)) if log_models: # Will only resolve `input_example` and `signature` if `log_models` is `True`. input_example, signature = resolve_input_example_and_signature( get_input_example, infer_model_signature, log_input_examples, log_model_signatures, _logger, ) try_mlflow_log( log_model, estimator, artifact_path="model", signature=signature, input_example=input_example, ) if _is_parameter_search_estimator(estimator): if hasattr(estimator, "best_estimator_") and log_models: try_mlflow_log( log_model, estimator.best_estimator_, artifact_path="best_estimator", signature=signature, input_example=input_example, ) if hasattr(estimator, "best_score_"): try_mlflow_log(mlflow.log_metric, "best_cv_score", estimator.best_score_) if hasattr(estimator, "best_params_"): best_params = { "best_{param_name}".format(param_name=param_name): param_value for param_name, param_value in estimator.best_params_.items() } try_mlflow_log(mlflow.log_params, best_params) if hasattr(estimator, "cv_results_"): try: # Fetch environment-specific tags (e.g., user and source) to ensure that lineage # information is consistent with the parent run child_tags = context_registry.resolve_tags() child_tags.update({MLFLOW_AUTOLOGGING: FLAVOR_NAME}) _create_child_runs_for_parameter_search( cv_estimator=estimator, parent_run=mlflow.active_run(), child_tags=child_tags, ) except Exception as e: msg = ( "Encountered exception during creation of child runs for parameter search." " Child runs may be missing. Exception: {}".format( str(e))) _logger.warning(msg) try: cv_results_df = pd.DataFrame.from_dict( estimator.cv_results_) _log_parameter_search_results_as_artifact( cv_results_df, mlflow.active_run().info.run_id) except Exception as e: msg = ( "Failed to log parameter search results as an artifact." " Exception: {}".format(str(e))) _logger.warning(msg)
def start_run(run_id=None, experiment_id=None, run_name=None, nested=False): """ Start a new MLflow run, setting it as the active run under which metrics and parameters will be logged. The return value can be used as a context manager within a ``with`` block; otherwise, you must call ``end_run()`` to terminate the current run. If you pass a ``run_id`` or the ``MLFLOW_RUN_ID`` environment variable is set, ``start_run`` attempts to resume a run with the specified run ID and other parameters are ignored. ``run_id`` takes precedence over ``MLFLOW_RUN_ID``. MLflow sets a variety of default tags on the run, as defined in :ref:`MLflow system tags <system_tags>`. :param run_id: If specified, get the run with the specified UUID and log parameters and metrics under that run. The run's end time is unset and its status is set to running, but the run's other attributes (``source_version``, ``source_type``, etc.) are not changed. :param experiment_id: ID of the experiment under which to create the current run (applicable only when ``run_id`` is not specified). If ``experiment_id`` argument is unspecified, will look for valid experiment in the following order: activated using ``set_experiment``, ``MLFLOW_EXPERIMENT_NAME`` environment variable, ``MLFLOW_EXPERIMENT_ID`` environment variable, or the default experiment as defined by the tracking server. :param run_name: Name of new run (stored as a ``mlflow.runName`` tag). Used only when ``run_id`` is unspecified. :param nested: Controls whether run is nested in parent run. ``True`` creates a nest run. :return: :py:class:`mlflow.ActiveRun` object that acts as a context manager wrapping the run's state. """ global _active_run_stack # back compat for int experiment_id experiment_id = str(experiment_id) if isinstance(experiment_id, int) else experiment_id if len(_active_run_stack) > 0 and not nested: raise Exception( ("Run with UUID {} is already active. To start a nested " + "run, call start_run with nested=True").format( _active_run_stack[0].info.run_id)) if run_id: existing_run_id = run_id elif _RUN_ID_ENV_VAR in os.environ: existing_run_id = os.environ[_RUN_ID_ENV_VAR] del os.environ[_RUN_ID_ENV_VAR] else: existing_run_id = None if existing_run_id: _validate_run_id(existing_run_id) active_run_obj = MlflowClient().get_run(existing_run_id) if active_run_obj.info.lifecycle_stage == LifecycleStage.DELETED: raise MlflowException( "Cannot start run with ID {} because it is in the " "deleted state.".format(existing_run_id)) else: if len(_active_run_stack) > 0: parent_run_id = _active_run_stack[-1].info.run_id else: parent_run_id = None exp_id_for_run = experiment_id if experiment_id is not None else _get_experiment_id( ) user_specified_tags = {} if parent_run_id is not None: user_specified_tags[MLFLOW_PARENT_RUN_ID] = parent_run_id if run_name is not None: user_specified_tags[MLFLOW_RUN_NAME] = run_name tags = context_registry.resolve_tags(user_specified_tags) active_run_obj = MlflowClient().create_run( experiment_id=exp_id_for_run, tags=tags) _active_run_stack.append(ActiveRun(active_run_obj)) return _active_run_stack[-1]
def _log_posttraining_metadata(estimator, *args, **kwargs): """ Records metadata for a scikit-learn estimator after training has completed. This is intended to be invoked within a patched scikit-learn training routine (e.g., `fit()`, `fit_transform()`, ...) and assumes the existence of an active MLflow run that can be referenced via the fluent Tracking API. :param estimator: The scikit-learn estimator for which to log metadata. :param args: The arguments passed to the scikit-learn training routine (e.g., `fit()`, `fit_transform()`, ...). :param kwargs: The keyword arguments passed to the scikit-learn training routine. """ if hasattr(estimator, "score"): try: score_args = _get_args_for_score(estimator.score, estimator.fit, args, kwargs) training_score = estimator.score(*score_args) except Exception as e: # pylint: disable=broad-except msg = ( estimator.score.__qualname__ + " failed. The 'training_score' metric will not be recorded. Scoring error: " + str(e)) _logger.warning(msg) else: try_mlflow_log(mlflow.log_metric, "training_score", training_score) # log common metrics and artifacts for estimators (classifier, regressor) _log_specialized_estimator_content(estimator, mlflow.active_run().info.run_id, args, kwargs) input_example = None signature = None if hasattr(estimator, "predict"): try: # Fetch an input example using the first several rows of the array-like # training data supplied to the training routine (e.g., `fit()`) SAMPLE_ROWS = 5 fit_arg_names = _get_arg_names(estimator.fit) X_var_name, y_var_name = fit_arg_names[:2] input_example = _get_Xy(args, kwargs, X_var_name, y_var_name)[0][:SAMPLE_ROWS] model_output = estimator.predict(input_example) signature = infer_signature(input_example, model_output) except Exception as e: # pylint: disable=broad-except input_example = None msg = "Failed to infer an input example and model signature: " + str( e) _logger.warning(msg) try_mlflow_log( log_model, estimator, artifact_path="model", signature=signature, input_example=input_example, ) if _is_parameter_search_estimator(estimator): if hasattr(estimator, "best_estimator_"): try_mlflow_log( log_model, estimator.best_estimator_, artifact_path="best_estimator", signature=signature, input_example=input_example, ) if hasattr(estimator, "best_params_"): best_params = { "best_{param_name}".format(param_name=param_name): param_value for param_name, param_value in estimator.best_params_.items() } try_mlflow_log(mlflow.log_params, best_params) if hasattr(estimator, "cv_results_"): try: # Fetch environment-specific tags (e.g., user and source) to ensure that lineage # information is consistent with the parent run environment_tags = context_registry.resolve_tags() _create_child_runs_for_parameter_search( cv_estimator=estimator, parent_run=mlflow.active_run(), child_tags=environment_tags, ) except Exception as e: # pylint: disable=broad-except msg = ( "Encountered exception during creation of child runs for parameter search." " Child runs may be missing. Exception: {}".format( str(e))) _logger.warning(msg) try: cv_results_df = pd.DataFrame.from_dict( estimator.cv_results_) _log_parameter_search_results_as_artifact( cv_results_df, mlflow.active_run().info.run_id) except Exception as e: # pylint: disable=broad-except msg = ( "Failed to log parameter search results as an artifact." " Exception: {}".format(str(e))) _logger.warning(msg)
def start_run(run_id=None, experiment_id=None, run_name=None, nested=False, tags=None): """ Start a new MLflow run, setting it as the active run under which metrics and parameters will be logged. The return value can be used as a context manager within a ``with`` block; otherwise, you must call ``end_run()`` to terminate the current run. If you pass a ``run_id`` or the ``MLFLOW_RUN_ID`` environment variable is set, ``start_run`` attempts to resume a run with the specified run ID and other parameters are ignored. ``run_id`` takes precedence over ``MLFLOW_RUN_ID``. If resuming an existing run, the run status is set to ``RunStatus.RUNNING``. MLflow sets a variety of default tags on the run, as defined in :ref:`MLflow system tags <system_tags>`. :param run_id: If specified, get the run with the specified UUID and log parameters and metrics under that run. The run's end time is unset and its status is set to running, but the run's other attributes (``source_version``, ``source_type``, etc.) are not changed. :param experiment_id: ID of the experiment under which to create the current run (applicable only when ``run_id`` is not specified). If ``experiment_id`` argument is unspecified, will look for valid experiment in the following order: activated using ``set_experiment``, ``MLFLOW_EXPERIMENT_NAME`` environment variable, ``MLFLOW_EXPERIMENT_ID`` environment variable, or the default experiment as defined by the tracking server. :param run_name: Name of new run (stored as a ``mlflow.runName`` tag). Used only when ``run_id`` is unspecified. :param nested: Controls whether run is nested in parent run. ``True`` creates a nested run. :param tags: An optional dictionary of string keys and values to set as tags on the new run. :return: :py:class:`mlflow.ActiveRun` object that acts as a context manager wrapping the run's state. .. code-block:: python :caption: Example import mlflow # Create nested runs with mlflow.start_run(run_name='PARENT_RUN') as parent_run: mlflow.log_param("parent", "yes") with mlflow.start_run(run_name='CHILD_RUN', nested=True) as child_run: mlflow.log_param("child", "yes") print("parent run_id: {}".format(parent_run.info.run_id)) print("child run_id : {}".format(child_run.info.run_id)) print("--") # Search all child runs with a parent id query = "tags.mlflow.parentRunId = '{}'".format(parent_run.info.run_id) results = mlflow.search_runs(filter_string=query) print(results[["run_id", "params.child", "tags.mlflow.runName"]]) .. code-block:: text :caption: Output parent run_id: 5ec0e7ae18f54c2694ffb48c2fccf25c child run_id : 78b3b0d264b44cd29e8dc389749bb4be -- run_id params.child tags.mlflow.runName 0 78b3b0d264b44cd29e8dc389749bb4be yes CHILD_RUN """ global _active_run_stack # back compat for int experiment_id experiment_id = str(experiment_id) if isinstance(experiment_id, int) else experiment_id if len(_active_run_stack) > 0 and not nested: raise Exception(( "Run with UUID {} is already active. To start a new run, first end the " + "current run with mlflow.end_run(). To start a nested " + "run, call start_run with nested=True").format( _active_run_stack[0].info.run_id)) if run_id: existing_run_id = run_id elif _RUN_ID_ENV_VAR in os.environ: existing_run_id = os.environ[_RUN_ID_ENV_VAR] del os.environ[_RUN_ID_ENV_VAR] else: existing_run_id = None if existing_run_id: _validate_run_id(existing_run_id) active_run_obj = MlflowClient().get_run(existing_run_id) # Check to see if experiment_id from environment matches experiment_id from set_experiment() if (_active_experiment_id is not None and _active_experiment_id != active_run_obj.info.experiment_id): raise MlflowException( "Cannot start run with ID {} because active run ID " "does not match environment run ID. Make sure --experiment-name " "or --experiment-id matches experiment set with " "set_experiment(), or just use command-line " "arguments".format(existing_run_id)) # Check to see if current run isn't deleted if active_run_obj.info.lifecycle_stage == LifecycleStage.DELETED: raise MlflowException( "Cannot start run with ID {} because it is in the " "deleted state.".format(existing_run_id)) # Use previous end_time because a value is required for update_run_info end_time = active_run_obj.info.end_time _get_store().update_run_info(existing_run_id, run_status=RunStatus.RUNNING, end_time=end_time) active_run_obj = MlflowClient().get_run(existing_run_id) else: if len(_active_run_stack) > 0: parent_run_id = _active_run_stack[-1].info.run_id else: parent_run_id = None exp_id_for_run = experiment_id if experiment_id is not None else _get_experiment_id( ) user_specified_tags = tags or {} if parent_run_id is not None: user_specified_tags[MLFLOW_PARENT_RUN_ID] = parent_run_id if run_name is not None: user_specified_tags[MLFLOW_RUN_NAME] = run_name tags = context_registry.resolve_tags(user_specified_tags) active_run_obj = MlflowClient().create_run( experiment_id=exp_id_for_run, tags=tags) _active_run_stack.append(ActiveRun(active_run_obj)) return _active_run_stack[-1]
def _log_posttraining_metadata(autologging_client, estimator, *args, **kwargs): """ Records metadata for a scikit-learn estimator after training has completed. This is intended to be invoked within a patched scikit-learn training routine (e.g., `fit()`, `fit_transform()`, ...) and assumes the existence of an active MLflow run that can be referenced via the fluent Tracking API. :param autologging_client: An instance of `MlflowAutologgingQueueingClient` used for efficiently logging run data to MLflow Tracking. :param estimator: The scikit-learn estimator for which to log metadata. :param args: The arguments passed to the scikit-learn training routine (e.g., `fit()`, `fit_transform()`, ...). :param kwargs: The keyword arguments passed to the scikit-learn training routine. """ def infer_model_signature(input_example): if not hasattr(estimator, "predict"): raise Exception( "the trained model does not specify a `predict` function, " + "which is required in order to infer the signature") return infer_signature(input_example, estimator.predict(input_example)) (X, y_true, sample_weight) = _get_args_for_metrics(estimator.fit, args, kwargs) # log common metrics and artifacts for estimators (classifier, regressor) logged_metrics = _log_estimator_content( autologging_client=autologging_client, estimator=estimator, prefix=_TRAINING_PREFIX, run_id=mlflow.active_run().info.run_id, X=X, y_true=y_true, sample_weight=sample_weight, ) if y_true is None and not logged_metrics: _logger.warning( "Training metrics will not be recorded because training labels were not specified." " To automatically record training metrics, provide training labels as inputs to" " the model training function.") def get_input_example(): # Fetch an input example using the first several rows of the array-like # training data supplied to the training routine (e.g., `fit()`) input_example = X[:INPUT_EXAMPLE_SAMPLE_ROWS] return input_example if log_models: # Will only resolve `input_example` and `signature` if `log_models` is `True`. input_example, signature = resolve_input_example_and_signature( get_input_example, infer_model_signature, log_input_examples, log_model_signatures, _logger, ) log_model( estimator, artifact_path="model", signature=signature, input_example=input_example, ) if _is_parameter_search_estimator(estimator): if hasattr(estimator, "best_estimator_") and log_models: log_model( estimator.best_estimator_, artifact_path="best_estimator", signature=signature, input_example=input_example, ) if hasattr(estimator, "best_score_"): autologging_client.log_metrics( run_id=mlflow.active_run().info.run_id, metrics={"best_cv_score": estimator.best_score_}, ) if hasattr(estimator, "best_params_"): best_params = { "best_{param_name}".format(param_name=param_name): param_value for param_name, param_value in estimator.best_params_.items() } autologging_client.log_params( run_id=mlflow.active_run().info.run_id, params=best_params, ) if hasattr(estimator, "cv_results_"): try: # Fetch environment-specific tags (e.g., user and source) to ensure that lineage # information is consistent with the parent run child_tags = context_registry.resolve_tags() child_tags.update({MLFLOW_AUTOLOGGING: FLAVOR_NAME}) _create_child_runs_for_parameter_search( autologging_client=autologging_client, cv_estimator=estimator, parent_run=mlflow.active_run(), max_tuning_runs=max_tuning_runs, child_tags=child_tags, ) except Exception as e: msg = ( "Encountered exception during creation of child runs for parameter search." " Child runs may be missing. Exception: {}".format( str(e))) _logger.warning(msg) try: cv_results_df = pd.DataFrame.from_dict( estimator.cv_results_) _log_parameter_search_results_as_artifact( cv_results_df, mlflow.active_run().info.run_id) except Exception as e: msg = ( "Failed to log parameter search results as an artifact." " Exception: {}".format(str(e))) _logger.warning(msg)
def _log_posttraining_metadata(estimator, spark_model, params, input_df): if _is_parameter_search_estimator(estimator): try: # Fetch environment-specific tags (e.g., user and source) to ensure that lineage # information is consistent with the parent run child_tags = context_registry.resolve_tags() child_tags.update( {MLFLOW_AUTOLOGGING: AUTOLOGGING_INTEGRATION_NAME}) _create_child_runs_for_parameter_search( parent_estimator=estimator, parent_model=spark_model, parent_run=mlflow.active_run(), child_tags=child_tags, ) except Exception: import traceback msg = ( "Encountered exception during creation of child runs for parameter search." " Child runs may be missing. Exception: {}".format( traceback.format_exc())) _logger.warning(msg) estimator_param_maps = _get_tuning_param_maps( estimator, estimator._autologging_metadata.uid_to_indexed_name_map) metrics_dict, best_index = _get_param_search_metrics_and_best_index( estimator, spark_model) _log_parameter_search_results_as_artifact( estimator_param_maps, metrics_dict, mlflow.active_run().info.run_id) # Log best_param_map as JSON artifact best_param_map = estimator_param_maps[best_index] mlflow.log_dict(best_param_map, artifact_file="best_parameters.json") # Log best_param_map as autologging parameters as well _log_estimator_params({ f"best_{param_name}": param_value for param_name, param_value in best_param_map.items() }) if log_models: if _should_log_model(spark_model): from mlflow.models import infer_signature from mlflow.pyspark.ml._autolog import ( cast_spark_df_with_vector_to_array, get_feature_cols, ) from mlflow.spark import _find_and_set_features_col_as_vector_if_needed from pyspark.sql import SparkSession spark = SparkSession.builder.getOrCreate() def _get_input_example_as_pd_df(): feature_cols = list( get_feature_cols(input_df.schema, spark_model)) limited_input_df = input_df.select(feature_cols).limit( INPUT_EXAMPLE_SAMPLE_ROWS) return cast_spark_df_with_vector_to_array( limited_input_df).toPandas() def _infer_model_signature(input_example_slice): input_slice_df = _find_and_set_features_col_as_vector_if_needed( spark.createDataFrame(input_example_slice), spark_model) model_output = spark_model.transform(input_slice_df).drop( *input_slice_df.columns) return infer_signature(input_example_slice, model_output.toPandas()) input_example, signature = resolve_input_example_and_signature( _get_input_example_as_pd_df, _infer_model_signature, log_input_examples, log_model_signatures, _logger, ) mlflow.spark.log_model( spark_model, artifact_path="model", registered_model_name=registered_model_name, input_example=input_example, signature=signature, ) if _is_parameter_search_model(spark_model): mlflow.spark.log_model( spark_model.bestModel, artifact_path="best_model", ) else: _logger.warning( _get_warning_msg_for_skip_log_model(spark_model))