예제 #1
0
def test_is_testing_respects_environment_variable():
    try:
        prev_env_var_value = os.environ.pop("MLFLOW_AUTOLOGGING_TESTING", None)
        assert not _is_testing()

        os.environ["MLFLOW_AUTOLOGGING_TESTING"] = "false"
        assert not _is_testing()

        os.environ["MLFLOW_AUTOLOGGING_TESTING"] = "true"
        assert _is_testing()
    finally:
        if prev_env_var_value:
            os.environ["MLFLOW_AUTOLOGGING_TESTING"] = prev_env_var_value
        else:
            del os.environ["MLFLOW_AUTOLOGGING_TESTING"]
def test_exception_safe_class_exhibits_expected_behavior_in_standard_mode(
        baseclass, metaclass):
    assert not autologging_utils._is_testing()

    class NonThrowingClass(baseclass, metaclass=metaclass):
        def function(self):
            return 10

    assert NonThrowingClass().function() == 10

    exc_to_throw = Exception("function error")

    class ThrowingClass(baseclass, metaclass=metaclass):
        def function(self):
            raise exc_to_throw

    with mock.patch(
            "mlflow.utils.autologging_utils._logger.warning") as logger_mock:
        ThrowingClass().function()

        assert logger_mock.call_count == 1

        message, formatting_arg = logger_mock.call_args[0]
        assert "unexpected error during autologging" in message
        assert formatting_arg == exc_to_throw
def test_safe_patch_validates_autologging_runs_when_necessary_in_test_mode(
        patch_destination, test_autologging_integration):
    assert autologging_utils._is_testing()

    def no_tag_run_patch_impl(original, *args, **kwargs):
        with mlflow.start_run(nested=True):
            return original(*args, **kwargs)

    safe_patch(test_autologging_integration, patch_destination, "fn",
               no_tag_run_patch_impl)

    with mock.patch("mlflow.utils.autologging_utils._validate_autologging_run",
                    wraps=_validate_autologging_run) as validate_run_mock:

        with pytest.raises(
                AssertionError,
                match="failed to set autologging tag with expected value"):
            patch_destination.fn()
            assert validate_run_mock.call_count == 1

        validate_run_mock.reset_mock()

        with mlflow.start_run(nested=True):
            # If a user-generated run existed prior to the autologged training session, we expect
            # that safe patch will not attempt to validate it
            patch_destination.fn()
            assert not validate_run_mock.called
예제 #4
0
def test_try_mlflow_log_emits_exceptions_as_warnings_in_standard_mode():
    assert not autologging_utils._is_testing()

    def throwing_function():
        raise Exception("bad implementation")

    with pytest.warns(UserWarning, match="bad implementation"):
        try_mlflow_log(throwing_function)
예제 #5
0
def test_try_mlflow_log_propagates_exceptions_in_test_mode():
    assert autologging_utils._is_testing()

    def throwing_function():
        raise Exception("bad implementation")

    with pytest.raises(Exception, match="bad implementation"):
        try_mlflow_log(throwing_function)
예제 #6
0
def test_mode_off():
    try:
        prev_env_var_value = os.environ.pop(_AUTOLOGGING_TEST_MODE_ENV_VAR,
                                            None)
        os.environ[_AUTOLOGGING_TEST_MODE_ENV_VAR] = "false"
        assert not _is_testing()
        yield
    finally:
        if prev_env_var_value:
            os.environ[_AUTOLOGGING_TEST_MODE_ENV_VAR] = prev_env_var_value
        else:
            del os.environ[_AUTOLOGGING_TEST_MODE_ENV_VAR]
def test_safe_patch_does_not_throw_when_autologging_runs_are_leaked_in_standard_mode(
        patch_destination, test_autologging_integration):
    assert not autologging_utils._is_testing()

    def leak_run_patch_impl(original, *args, **kwargs):
        mlflow.start_run(nested=True)

    safe_patch(test_autologging_integration, patch_destination, "fn",
               leak_run_patch_impl)
    patch_destination.fn()
    assert mlflow.active_run()

    # End the leaked run
    mlflow.end_run()

    assert not mlflow.active_run()
예제 #8
0
 def setup_autologging(module):
     try:
         autolog_fn = LIBRARY_TO_AUTOLOG_FN[module.__name__]
         autologging_params = get_autologging_params(autolog_fn)
         autolog_fn(**autologging_params)
         _logger.info("Autologging successfully enabled for %s.", module.__name__)
     except Exception as e:  # pylint: disable=broad-except
         if _is_testing():
             # Raise unexpected exceptions in test mode in order to detect
             # errors within dependent autologging integrations
             raise
         else:
             _logger.warning(
                 "Exception raised while enabling autologging for %s: %s",
                 module.__name__,
                 str(e),
             )
예제 #9
0
파일: conftest.py 프로젝트: radzak/mlflow
def enable_test_mode_by_default_for_autologging_integrations():
    """
    Run all MLflow tests in autologging test mode, ensuring that errors in autologging patch code
    are raised and detected. For more information about autologging test mode, see the docstring
    for :py:func:`mlflow.utils.autologging_utils._is_testing()`.
    """
    try:
        prev_env_var_value = os.environ.pop(_AUTOLOGGING_TEST_MODE_ENV_VAR,
                                            None)
        os.environ[_AUTOLOGGING_TEST_MODE_ENV_VAR] = "true"
        assert _is_testing()
        yield
    finally:
        if prev_env_var_value:
            os.environ[_AUTOLOGGING_TEST_MODE_ENV_VAR] = prev_env_var_value
        else:
            del os.environ[_AUTOLOGGING_TEST_MODE_ENV_VAR]
예제 #10
0
def test_exception_safe_class_exhibits_expected_behavior_in_test_mode():
    assert autologging_utils._is_testing()

    class NonThrowingClass(metaclass=ExceptionSafeClass):
        def function(self):
            return 10

    assert NonThrowingClass().function() == 10

    exc_to_throw = Exception("function error")

    class ThrowingClass(metaclass=ExceptionSafeClass):
        def function(self):
            raise exc_to_throw

    with pytest.raises(Exception) as exc:
        ThrowingClass().function()

    assert exc.value == exc_to_throw
예제 #11
0
def test_exception_safe_function_exhibits_expected_behavior_in_test_mode():
    assert autologging_utils._is_testing()

    @exception_safe_function
    def non_throwing_function():
        return 10

    assert non_throwing_function() == 10

    exc_to_throw = Exception("function error")

    @exception_safe_function
    def throwing_function():
        raise exc_to_throw

    with pytest.raises(Exception) as exc:
        throwing_function()

    assert exc.value == exc_to_throw
예제 #12
0
def test_exception_safe_function_exhibits_expected_behavior_in_standard_mode():
    assert not autologging_utils._is_testing()

    @exception_safe_function
    def non_throwing_function():
        return 10

    assert non_throwing_function() == 10

    exc_to_throw = Exception("bad implementation")

    @exception_safe_function
    def throwing_function():
        raise exc_to_throw

    with mock.patch("mlflow.utils.autologging_utils._logger.warning") as logger_mock:
        throwing_function()
        assert logger_mock.call_count == 1
        message, formatting_arg = logger_mock.call_args[0]
        assert "unexpected error during autologging" in message
        assert formatting_arg == exc_to_throw
예제 #13
0
def test_safe_patch_does_not_validate_autologging_runs_in_standard_mode(
        patch_destination, test_autologging_integration):
    assert not autologging_utils._is_testing()

    def no_tag_run_patch_impl(original, *args, **kwargs):
        with mlflow.start_run(nested=True):
            return original(*args, **kwargs)

    safe_patch(test_autologging_integration, patch_destination, "fn",
               no_tag_run_patch_impl)

    with mock.patch("mlflow.utils.autologging_utils._validate_autologging_run",
                    wraps=_validate_autologging_run) as validate_run_mock:

        patch_destination.fn()

        with mlflow.start_run(nested=True):
            # If a user-generated run existed prior to the autologged training session, we expect
            # that safe patch will not attempt to validate it
            patch_destination.fn()

        assert not validate_run_mock.called
예제 #14
0
def test_safe_patch_throws_when_autologging_runs_are_leaked_in_test_mode(
        patch_destination, test_autologging_integration):
    assert autologging_utils._is_testing()

    def leak_run_patch_impl(original, *args, **kwargs):
        mlflow.start_run(nested=True)

    safe_patch(test_autologging_integration, patch_destination, "fn",
               leak_run_patch_impl)
    with pytest.raises(AssertionError, match="leaked an active run"):
        patch_destination.fn()

    # End the leaked run
    mlflow.end_run()

    with mlflow.start_run():
        # If a user-generated run existed prior to the autologged training session, we expect
        # that safe patch will not throw a leaked run exception
        patch_destination.fn()
        # End the leaked nested run
        mlflow.end_run()

    assert not mlflow.active_run()
예제 #15
0
    def setup_autologging(module):
        try:
            autolog_fn = LIBRARY_TO_AUTOLOG_FN[module.__name__]
            try:
                needed_params = list(
                    inspect.signature(autolog_fn).parameters.keys())
                filtered = {k: v for k, v in locals_copy if k in needed_params}
            except Exception:  # pylint: disable=broad-except
                filtered = {}

            autolog_fn(**filtered)
            _logger.info("Autologging successfully enabled for %s.",
                         module.__name__)
        except Exception as e:  # pylint: disable=broad-except
            if _is_testing():
                # Raise unexpected exceptions in test mode in order to detect
                # errors within dependent autologging integrations
                raise
            else:
                _logger.warning(
                    "Exception raised while enabling autologging for %s: %s",
                    module.__name__,
                    str(e),
                )
예제 #16
0
def autolog(
    log_input_examples=False,
    log_model_signatures=True,
    log_models=True,
    disable=False,
    exclusive=False,
):  # pylint: disable=unused-argument
    """
    Enables (or disables) and configures autologging for all supported integrations.

    The parameters are passed to any autologging integrations that support them.

    See the :ref:`tracking docs <automatic-logging>` for a list of supported autologging
    integrations.

    :param log_input_examples: If ``True``, input examples from training datasets are collected and
                               logged along with model artifacts during training. If ``False``,
                               input examples are not logged.
                               Note: Input examples are MLflow model attributes
                               and are only collected if ``log_models`` is also ``True``.
    :param log_model_signatures: If ``True``,
                                 :py:class:`ModelSignatures <mlflow.models.ModelSignature>`
                                 describing model inputs and outputs are collected and logged along
                                 with model artifacts during training. If ``False``, signatures are
                                 not logged. Note: Model signatures are MLflow model attributes
                                 and are only collected if ``log_models`` is also ``True``.
    :param log_models: If ``True``, trained models are logged as MLflow model artifacts.
                       If ``False``, trained models are not logged.
                       Input examples and model signatures, which are attributes of MLflow models,
                       are also omitted when ``log_models`` is ``False``.
    :param disable: If ``True``, disables all supported autologging integrations. If ``False``,
                    enables all supported autologging integrations.
    :param exclusive: If ``True``, autologged content is not logged to user-created fluent runs.
                      If ``False``, autologged content is logged to the active fluent run,
                      which may be user-created.

    .. code-block:: python
        :caption: Example

        import numpy as np
        import mlflow.sklearn
        from mlflow.tracking import MlflowClient
        from sklearn.linear_model import LinearRegression

        def print_auto_logged_info(r):
            tags = {k: v for k, v in r.data.tags.items() if not k.startswith("mlflow.")}
            artifacts = [f.path for f in MlflowClient().list_artifacts(r.info.run_id, "model")]
            print("run_id: {}".format(r.info.run_id))
            print("artifacts: {}".format(artifacts))
            print("params: {}".format(r.data.params))
            print("metrics: {}".format(r.data.metrics))
            print("tags: {}".format(tags))

        # prepare training data
        X = np.array([[1, 1], [1, 2], [2, 2], [2, 3]])
        y = np.dot(X, np.array([1, 2])) + 3

        # Auto log all the parameters, metrics, and artifacts
        mlflow.autolog()
        model = LinearRegression()
        with mlflow.start_run() as run:
            model.fit(X, y)

        # fetch the auto logged parameters and metrics for ended run
        print_auto_logged_info(mlflow.get_run(run_id=run.info.run_id))

    .. code-block:: text
        :caption: Output

        run_id: fd10a17d028c47399a55ab8741721ef7
        artifacts: ['model/MLmodel', 'model/conda.yaml', 'model/model.pkl']
        params: {'copy_X': 'True',
                 'normalize': 'False',
                 'fit_intercept': 'True',
                 'n_jobs': 'None'}
        metrics: {'training_score': 1.0,
                  'training_rmse': 4.440892098500626e-16,
                  'training_r2_score': 1.0,
                  'training_mae': 2.220446049250313e-16,
                  'training_mse': 1.9721522630525295e-31}
        tags: {'estimator_class': 'sklearn.linear_model._base.LinearRegression',
               'estimator_name': 'LinearRegression'}
    """
    locals_copy = locals().items()

    # Mapping of library module name to specific autolog function
    # eg: mxnet.gluon is the actual library, mlflow.gluon.autolog is our autolog function for it
    LIBRARY_TO_AUTOLOG_FN = {
        "tensorflow": tensorflow.autolog,
        "keras": keras.autolog,
        "mxnet.gluon": gluon.autolog,
        "xgboost": xgboost.autolog,
        "lightgbm": lightgbm.autolog,
        "statsmodels": statsmodels.autolog,
        "sklearn": sklearn.autolog,
        "fastai": fastai.autolog,
        "pyspark": spark.autolog,
        # TODO: Broaden this beyond pytorch_lightning as we add autologging support for more
        # Pytorch frameworks under mlflow.pytorch.autolog
        "pytorch_lightning": pytorch.autolog,
    }

    def setup_autologging(module):
        try:
            autolog_fn = LIBRARY_TO_AUTOLOG_FN[module.__name__]
            try:
                needed_params = list(
                    inspect.signature(autolog_fn).parameters.keys())
                filtered = {k: v for k, v in locals_copy if k in needed_params}
            except Exception:  # pylint: disable=broad-except
                filtered = {}

            autolog_fn(**filtered)
            _logger.info("Autologging successfully enabled for %s.",
                         module.__name__)
        except Exception as e:  # pylint: disable=broad-except
            if _is_testing():
                # Raise unexpected exceptions in test mode in order to detect
                # errors within dependent autologging integrations
                raise
            else:
                _logger.warning(
                    "Exception raised while enabling autologging for %s: %s",
                    module.__name__,
                    str(e),
                )

    # for each autolog library (except pyspark), register a post-import hook.
    # this way, we do not send any errors to the user until we know they are using the library.
    # the post-import hook also retroactively activates for previously-imported libraries.
    for module in list(set(LIBRARY_TO_AUTOLOG_FN.keys()) - set(["pyspark"])):
        register_post_import_hook(setup_autologging, module, overwrite=True)

    # for pyspark, we activate autologging immediately, without waiting for a module import.
    # this is because on Databricks a SparkSession already exists and the user can directly
    #   interact with it, and this activity should be logged.
    try:
        spark.autolog()
    except ImportError as ie:
        # if pyspark isn't installed, a user could potentially install it in the middle
        #   of their session so we want to enable autologging once they do
        if "pyspark" in str(ie):
            register_post_import_hook(setup_autologging,
                                      "pyspark",
                                      overwrite=True)
    except Exception as e:  # pylint: disable=broad-except
        if _is_testing():
            # Raise unexpected exceptions in test mode in order to detect
            # errors within dependent autologging integrations
            raise
        else:
            _logger.warning(
                "Exception raised while enabling autologging for spark: %s",
                str(e))
예제 #17
0
def test_mode_on():
    with mock.patch("mlflow.utils.autologging_utils._is_testing") as testing_mock:
        testing_mock.return_value = True
        assert autologging_utils._is_testing()
        yield