def test_is_testing_respects_environment_variable(): try: prev_env_var_value = os.environ.pop("MLFLOW_AUTOLOGGING_TESTING", None) assert not _is_testing() os.environ["MLFLOW_AUTOLOGGING_TESTING"] = "false" assert not _is_testing() os.environ["MLFLOW_AUTOLOGGING_TESTING"] = "true" assert _is_testing() finally: if prev_env_var_value: os.environ["MLFLOW_AUTOLOGGING_TESTING"] = prev_env_var_value else: del os.environ["MLFLOW_AUTOLOGGING_TESTING"]
def test_exception_safe_class_exhibits_expected_behavior_in_standard_mode( baseclass, metaclass): assert not autologging_utils._is_testing() class NonThrowingClass(baseclass, metaclass=metaclass): def function(self): return 10 assert NonThrowingClass().function() == 10 exc_to_throw = Exception("function error") class ThrowingClass(baseclass, metaclass=metaclass): def function(self): raise exc_to_throw with mock.patch( "mlflow.utils.autologging_utils._logger.warning") as logger_mock: ThrowingClass().function() assert logger_mock.call_count == 1 message, formatting_arg = logger_mock.call_args[0] assert "unexpected error during autologging" in message assert formatting_arg == exc_to_throw
def test_safe_patch_validates_autologging_runs_when_necessary_in_test_mode( patch_destination, test_autologging_integration): assert autologging_utils._is_testing() def no_tag_run_patch_impl(original, *args, **kwargs): with mlflow.start_run(nested=True): return original(*args, **kwargs) safe_patch(test_autologging_integration, patch_destination, "fn", no_tag_run_patch_impl) with mock.patch("mlflow.utils.autologging_utils._validate_autologging_run", wraps=_validate_autologging_run) as validate_run_mock: with pytest.raises( AssertionError, match="failed to set autologging tag with expected value"): patch_destination.fn() assert validate_run_mock.call_count == 1 validate_run_mock.reset_mock() with mlflow.start_run(nested=True): # If a user-generated run existed prior to the autologged training session, we expect # that safe patch will not attempt to validate it patch_destination.fn() assert not validate_run_mock.called
def test_try_mlflow_log_emits_exceptions_as_warnings_in_standard_mode(): assert not autologging_utils._is_testing() def throwing_function(): raise Exception("bad implementation") with pytest.warns(UserWarning, match="bad implementation"): try_mlflow_log(throwing_function)
def test_try_mlflow_log_propagates_exceptions_in_test_mode(): assert autologging_utils._is_testing() def throwing_function(): raise Exception("bad implementation") with pytest.raises(Exception, match="bad implementation"): try_mlflow_log(throwing_function)
def test_mode_off(): try: prev_env_var_value = os.environ.pop(_AUTOLOGGING_TEST_MODE_ENV_VAR, None) os.environ[_AUTOLOGGING_TEST_MODE_ENV_VAR] = "false" assert not _is_testing() yield finally: if prev_env_var_value: os.environ[_AUTOLOGGING_TEST_MODE_ENV_VAR] = prev_env_var_value else: del os.environ[_AUTOLOGGING_TEST_MODE_ENV_VAR]
def test_safe_patch_does_not_throw_when_autologging_runs_are_leaked_in_standard_mode( patch_destination, test_autologging_integration): assert not autologging_utils._is_testing() def leak_run_patch_impl(original, *args, **kwargs): mlflow.start_run(nested=True) safe_patch(test_autologging_integration, patch_destination, "fn", leak_run_patch_impl) patch_destination.fn() assert mlflow.active_run() # End the leaked run mlflow.end_run() assert not mlflow.active_run()
def setup_autologging(module): try: autolog_fn = LIBRARY_TO_AUTOLOG_FN[module.__name__] autologging_params = get_autologging_params(autolog_fn) autolog_fn(**autologging_params) _logger.info("Autologging successfully enabled for %s.", module.__name__) except Exception as e: # pylint: disable=broad-except if _is_testing(): # Raise unexpected exceptions in test mode in order to detect # errors within dependent autologging integrations raise else: _logger.warning( "Exception raised while enabling autologging for %s: %s", module.__name__, str(e), )
def enable_test_mode_by_default_for_autologging_integrations(): """ Run all MLflow tests in autologging test mode, ensuring that errors in autologging patch code are raised and detected. For more information about autologging test mode, see the docstring for :py:func:`mlflow.utils.autologging_utils._is_testing()`. """ try: prev_env_var_value = os.environ.pop(_AUTOLOGGING_TEST_MODE_ENV_VAR, None) os.environ[_AUTOLOGGING_TEST_MODE_ENV_VAR] = "true" assert _is_testing() yield finally: if prev_env_var_value: os.environ[_AUTOLOGGING_TEST_MODE_ENV_VAR] = prev_env_var_value else: del os.environ[_AUTOLOGGING_TEST_MODE_ENV_VAR]
def test_exception_safe_class_exhibits_expected_behavior_in_test_mode(): assert autologging_utils._is_testing() class NonThrowingClass(metaclass=ExceptionSafeClass): def function(self): return 10 assert NonThrowingClass().function() == 10 exc_to_throw = Exception("function error") class ThrowingClass(metaclass=ExceptionSafeClass): def function(self): raise exc_to_throw with pytest.raises(Exception) as exc: ThrowingClass().function() assert exc.value == exc_to_throw
def test_exception_safe_function_exhibits_expected_behavior_in_test_mode(): assert autologging_utils._is_testing() @exception_safe_function def non_throwing_function(): return 10 assert non_throwing_function() == 10 exc_to_throw = Exception("function error") @exception_safe_function def throwing_function(): raise exc_to_throw with pytest.raises(Exception) as exc: throwing_function() assert exc.value == exc_to_throw
def test_exception_safe_function_exhibits_expected_behavior_in_standard_mode(): assert not autologging_utils._is_testing() @exception_safe_function def non_throwing_function(): return 10 assert non_throwing_function() == 10 exc_to_throw = Exception("bad implementation") @exception_safe_function def throwing_function(): raise exc_to_throw with mock.patch("mlflow.utils.autologging_utils._logger.warning") as logger_mock: throwing_function() assert logger_mock.call_count == 1 message, formatting_arg = logger_mock.call_args[0] assert "unexpected error during autologging" in message assert formatting_arg == exc_to_throw
def test_safe_patch_does_not_validate_autologging_runs_in_standard_mode( patch_destination, test_autologging_integration): assert not autologging_utils._is_testing() def no_tag_run_patch_impl(original, *args, **kwargs): with mlflow.start_run(nested=True): return original(*args, **kwargs) safe_patch(test_autologging_integration, patch_destination, "fn", no_tag_run_patch_impl) with mock.patch("mlflow.utils.autologging_utils._validate_autologging_run", wraps=_validate_autologging_run) as validate_run_mock: patch_destination.fn() with mlflow.start_run(nested=True): # If a user-generated run existed prior to the autologged training session, we expect # that safe patch will not attempt to validate it patch_destination.fn() assert not validate_run_mock.called
def test_safe_patch_throws_when_autologging_runs_are_leaked_in_test_mode( patch_destination, test_autologging_integration): assert autologging_utils._is_testing() def leak_run_patch_impl(original, *args, **kwargs): mlflow.start_run(nested=True) safe_patch(test_autologging_integration, patch_destination, "fn", leak_run_patch_impl) with pytest.raises(AssertionError, match="leaked an active run"): patch_destination.fn() # End the leaked run mlflow.end_run() with mlflow.start_run(): # If a user-generated run existed prior to the autologged training session, we expect # that safe patch will not throw a leaked run exception patch_destination.fn() # End the leaked nested run mlflow.end_run() assert not mlflow.active_run()
def setup_autologging(module): try: autolog_fn = LIBRARY_TO_AUTOLOG_FN[module.__name__] try: needed_params = list( inspect.signature(autolog_fn).parameters.keys()) filtered = {k: v for k, v in locals_copy if k in needed_params} except Exception: # pylint: disable=broad-except filtered = {} autolog_fn(**filtered) _logger.info("Autologging successfully enabled for %s.", module.__name__) except Exception as e: # pylint: disable=broad-except if _is_testing(): # Raise unexpected exceptions in test mode in order to detect # errors within dependent autologging integrations raise else: _logger.warning( "Exception raised while enabling autologging for %s: %s", module.__name__, str(e), )
def autolog( log_input_examples=False, log_model_signatures=True, log_models=True, disable=False, exclusive=False, ): # pylint: disable=unused-argument """ Enables (or disables) and configures autologging for all supported integrations. The parameters are passed to any autologging integrations that support them. See the :ref:`tracking docs <automatic-logging>` for a list of supported autologging integrations. :param log_input_examples: If ``True``, input examples from training datasets are collected and logged along with model artifacts during training. If ``False``, input examples are not logged. Note: Input examples are MLflow model attributes and are only collected if ``log_models`` is also ``True``. :param log_model_signatures: If ``True``, :py:class:`ModelSignatures <mlflow.models.ModelSignature>` describing model inputs and outputs are collected and logged along with model artifacts during training. If ``False``, signatures are not logged. Note: Model signatures are MLflow model attributes and are only collected if ``log_models`` is also ``True``. :param log_models: If ``True``, trained models are logged as MLflow model artifacts. If ``False``, trained models are not logged. Input examples and model signatures, which are attributes of MLflow models, are also omitted when ``log_models`` is ``False``. :param disable: If ``True``, disables all supported autologging integrations. If ``False``, enables all supported autologging integrations. :param exclusive: If ``True``, autologged content is not logged to user-created fluent runs. If ``False``, autologged content is logged to the active fluent run, which may be user-created. .. code-block:: python :caption: Example import numpy as np import mlflow.sklearn from mlflow.tracking import MlflowClient from sklearn.linear_model import LinearRegression def print_auto_logged_info(r): tags = {k: v for k, v in r.data.tags.items() if not k.startswith("mlflow.")} artifacts = [f.path for f in MlflowClient().list_artifacts(r.info.run_id, "model")] print("run_id: {}".format(r.info.run_id)) print("artifacts: {}".format(artifacts)) print("params: {}".format(r.data.params)) print("metrics: {}".format(r.data.metrics)) print("tags: {}".format(tags)) # prepare training data X = np.array([[1, 1], [1, 2], [2, 2], [2, 3]]) y = np.dot(X, np.array([1, 2])) + 3 # Auto log all the parameters, metrics, and artifacts mlflow.autolog() model = LinearRegression() with mlflow.start_run() as run: model.fit(X, y) # fetch the auto logged parameters and metrics for ended run print_auto_logged_info(mlflow.get_run(run_id=run.info.run_id)) .. code-block:: text :caption: Output run_id: fd10a17d028c47399a55ab8741721ef7 artifacts: ['model/MLmodel', 'model/conda.yaml', 'model/model.pkl'] params: {'copy_X': 'True', 'normalize': 'False', 'fit_intercept': 'True', 'n_jobs': 'None'} metrics: {'training_score': 1.0, 'training_rmse': 4.440892098500626e-16, 'training_r2_score': 1.0, 'training_mae': 2.220446049250313e-16, 'training_mse': 1.9721522630525295e-31} tags: {'estimator_class': 'sklearn.linear_model._base.LinearRegression', 'estimator_name': 'LinearRegression'} """ locals_copy = locals().items() # Mapping of library module name to specific autolog function # eg: mxnet.gluon is the actual library, mlflow.gluon.autolog is our autolog function for it LIBRARY_TO_AUTOLOG_FN = { "tensorflow": tensorflow.autolog, "keras": keras.autolog, "mxnet.gluon": gluon.autolog, "xgboost": xgboost.autolog, "lightgbm": lightgbm.autolog, "statsmodels": statsmodels.autolog, "sklearn": sklearn.autolog, "fastai": fastai.autolog, "pyspark": spark.autolog, # TODO: Broaden this beyond pytorch_lightning as we add autologging support for more # Pytorch frameworks under mlflow.pytorch.autolog "pytorch_lightning": pytorch.autolog, } def setup_autologging(module): try: autolog_fn = LIBRARY_TO_AUTOLOG_FN[module.__name__] try: needed_params = list( inspect.signature(autolog_fn).parameters.keys()) filtered = {k: v for k, v in locals_copy if k in needed_params} except Exception: # pylint: disable=broad-except filtered = {} autolog_fn(**filtered) _logger.info("Autologging successfully enabled for %s.", module.__name__) except Exception as e: # pylint: disable=broad-except if _is_testing(): # Raise unexpected exceptions in test mode in order to detect # errors within dependent autologging integrations raise else: _logger.warning( "Exception raised while enabling autologging for %s: %s", module.__name__, str(e), ) # for each autolog library (except pyspark), register a post-import hook. # this way, we do not send any errors to the user until we know they are using the library. # the post-import hook also retroactively activates for previously-imported libraries. for module in list(set(LIBRARY_TO_AUTOLOG_FN.keys()) - set(["pyspark"])): register_post_import_hook(setup_autologging, module, overwrite=True) # for pyspark, we activate autologging immediately, without waiting for a module import. # this is because on Databricks a SparkSession already exists and the user can directly # interact with it, and this activity should be logged. try: spark.autolog() except ImportError as ie: # if pyspark isn't installed, a user could potentially install it in the middle # of their session so we want to enable autologging once they do if "pyspark" in str(ie): register_post_import_hook(setup_autologging, "pyspark", overwrite=True) except Exception as e: # pylint: disable=broad-except if _is_testing(): # Raise unexpected exceptions in test mode in order to detect # errors within dependent autologging integrations raise else: _logger.warning( "Exception raised while enabling autologging for spark: %s", str(e))
def test_mode_on(): with mock.patch("mlflow.utils.autologging_utils._is_testing") as testing_mock: testing_mock.return_value = True assert autologging_utils._is_testing() yield