Esempio n. 1
0
    def _call(self,
              *args,
              _pypads_env: LoggerEnv,
              _pypads_autologgers=None,
              _logger_call,
              _logger_output,
              **kwargs):
        if _pypads_autologgers is None:
            _pypads_autologgers = [
                "keras", "tensorflow", "xgboost", "gluon", "spark", "lightgbm",
                "sklearn"
            ]

        if 'tensorflow' in _pypads_autologgers and 'tensorflow' in sys.modules and 'tensorflow' not in added_auto_logs \
                and is_package_available('tensorflow'):
            added_auto_logs.add('tensorflow')
            from mlflow import tensorflow
            tensorflow.autolog()

        if 'keras' in _pypads_autologgers and 'keras' in sys.modules and 'keras' not in added_auto_logs \
                and is_package_available('keras'):
            added_auto_logs.add('keras')
            from mlflow import keras
            keras.autolog()

        if 'xgboost' in _pypads_autologgers and 'xgboost' in sys.modules and 'xgboost' not in added_auto_logs \
                and is_package_available('xgboost'):
            added_auto_logs.add('xgboost')
            from mlflow import xgboost
            xgboost.autolog()

        if 'gluon' in _pypads_autologgers and 'gluon' in sys.modules and 'gluon' not in added_auto_logs \
                and is_package_available('gluon'):
            added_auto_logs.add('gluon')
            from mlflow import gluon
            gluon.autolog()

        if 'spark' in _pypads_autologgers and 'spark' in sys.modules and 'spark' not in added_auto_logs \
                and is_package_available('pyspark'):
            added_auto_logs.add('spark')
            from mlflow import spark
            spark.autolog()

        if 'lightgbm' in _pypads_autologgers and 'lightgbm' in sys.modules and 'lightgbm' not in added_auto_logs \
                and is_package_available('lightgbm'):
            added_auto_logs.add('lightgbm')
            from mlflow import lightgbm
            lightgbm.autolog()

        if 'sklearn' in _pypads_autologgers and 'sklearn' in sys.modules and 'sklearn' not in added_auto_logs \
                and is_package_available('sklearn'):
            added_auto_logs.add('sklearn')
            from mlflow import sklearn
            sklearn.autolog()
Esempio n. 2
0
def autolog(log_input_example=False, log_model_signature=True):  # pylint: disable=unused-argument
    locals_copy = locals().items()

    # Mapping of library module name to specific autolog function
    # eg: mxnet.gluon is the actual library, mlflow.gluon.autolog is our autolog function for it
    LIBRARY_TO_AUTOLOG_FN = {
        "tensorflow": tensorflow.autolog,
        "keras": keras.autolog,
        "mxnet.gluon": gluon.autolog,
        "xgboost": xgboost.autolog,
        "lightgbm": lightgbm.autolog,
        "sklearn": sklearn.autolog,
        "fastai": fastai.autolog,
        "pyspark": spark.autolog,
    }

    def setup_autologging(module):
        autolog_fn = LIBRARY_TO_AUTOLOG_FN[module.__name__]
        try:
            needed_params = list(inspect.signature(autolog_fn).parameters.keys())
            filtered = {k: v for k, v in locals_copy if k in needed_params}
        except ValueError:
            filtered = {}

        try:
            autolog_fn(**filtered)
            _logger.info("Autologging successfully enabled for %s.", module.__name__)
        except Exception as e:  # pylint: disable=broad-except
            _logger.warning(
                "Exception raised while enabling autologging for %s: %s", module.__name__, str(e)
            )

    # for each autolog library (except pyspark), register a post-import hook.
    # this way, we do not send any errors to the user until we know they are using the library.
    # the post-import hook also retroactively activates for previously-imported libraries.
    for module in list(set(LIBRARY_TO_AUTOLOG_FN.keys()) - set(["pyspark"])):
        register_post_import_hook(setup_autologging, module, overwrite=True)

    # for pyspark, we activate autologging immediately, without waiting for a module import.
    # this is because on Databricks a SparkSession already exists and the user can directly
    #   interact with it, and this activity should be logged.
    try:
        spark.autolog()
    except ImportError as ie:
        # if pyspark isn't installed, a user could potentially install it in the middle
        #   of their session so we want to enable autologging once they do
        if "pyspark" in str(ie):
            register_post_import_hook(setup_autologging, "pyspark", overwrite=True)
    except Exception as e:  # pylint: disable=broad-except
        _logger.warning("Exception raised while enabling autologging for spark: %s", str(e))
Esempio n. 3
0
def autolog(
    log_input_examples=False,
    log_model_signatures=True,
    log_models=True,
    disable=False,
    exclusive=False,
):  # pylint: disable=unused-argument
    """
    Enables (or disables) and configures autologging for all supported integrations.

    The parameters are passed to any autologging integrations that support them.

    See the :ref:`tracking docs <automatic-logging>` for a list of supported autologging
    integrations.

    :param log_input_examples: If ``True``, input examples from training datasets are collected and
                               logged along with model artifacts during training. If ``False``,
                               input examples are not logged.
                               Note: Input examples are MLflow model attributes
                               and are only collected if ``log_models`` is also ``True``.
    :param log_model_signatures: If ``True``,
                                 :py:class:`ModelSignatures <mlflow.models.ModelSignature>`
                                 describing model inputs and outputs are collected and logged along
                                 with model artifacts during training. If ``False``, signatures are
                                 not logged. Note: Model signatures are MLflow model attributes
                                 and are only collected if ``log_models`` is also ``True``.
    :param log_models: If ``True``, trained models are logged as MLflow model artifacts.
                       If ``False``, trained models are not logged.
                       Input examples and model signatures, which are attributes of MLflow models,
                       are also omitted when ``log_models`` is ``False``.
    :param disable: If ``True``, disables all supported autologging integrations. If ``False``,
                    enables all supported autologging integrations.
    :param exclusive: If ``True``, autologged content is not logged to user-created fluent runs.
                      If ``False``, autologged content is logged to the active fluent run,
                      which may be user-created.

    .. code-block:: python
        :caption: Example

        import numpy as np
        import mlflow.sklearn
        from mlflow.tracking import MlflowClient
        from sklearn.linear_model import LinearRegression

        def print_auto_logged_info(r):
            tags = {k: v for k, v in r.data.tags.items() if not k.startswith("mlflow.")}
            artifacts = [f.path for f in MlflowClient().list_artifacts(r.info.run_id, "model")]
            print("run_id: {}".format(r.info.run_id))
            print("artifacts: {}".format(artifacts))
            print("params: {}".format(r.data.params))
            print("metrics: {}".format(r.data.metrics))
            print("tags: {}".format(tags))

        # prepare training data
        X = np.array([[1, 1], [1, 2], [2, 2], [2, 3]])
        y = np.dot(X, np.array([1, 2])) + 3

        # Auto log all the parameters, metrics, and artifacts
        mlflow.autolog()
        model = LinearRegression()
        with mlflow.start_run() as run:
            model.fit(X, y)

        # fetch the auto logged parameters and metrics for ended run
        print_auto_logged_info(mlflow.get_run(run_id=run.info.run_id))

    .. code-block:: text
        :caption: Output

        run_id: fd10a17d028c47399a55ab8741721ef7
        artifacts: ['model/MLmodel', 'model/conda.yaml', 'model/model.pkl']
        params: {'copy_X': 'True',
                 'normalize': 'False',
                 'fit_intercept': 'True',
                 'n_jobs': 'None'}
        metrics: {'training_score': 1.0,
                  'training_rmse': 4.440892098500626e-16,
                  'training_r2_score': 1.0,
                  'training_mae': 2.220446049250313e-16,
                  'training_mse': 1.9721522630525295e-31}
        tags: {'estimator_class': 'sklearn.linear_model._base.LinearRegression',
               'estimator_name': 'LinearRegression'}
    """
    locals_copy = locals().items()

    # Mapping of library module name to specific autolog function
    # eg: mxnet.gluon is the actual library, mlflow.gluon.autolog is our autolog function for it
    LIBRARY_TO_AUTOLOG_FN = {
        "tensorflow": tensorflow.autolog,
        "keras": keras.autolog,
        "mxnet.gluon": gluon.autolog,
        "xgboost": xgboost.autolog,
        "lightgbm": lightgbm.autolog,
        "statsmodels": statsmodels.autolog,
        "sklearn": sklearn.autolog,
        "fastai": fastai.autolog,
        "pyspark": spark.autolog,
        # TODO: Broaden this beyond pytorch_lightning as we add autologging support for more
        # Pytorch frameworks under mlflow.pytorch.autolog
        "pytorch_lightning": pytorch.autolog,
    }

    def setup_autologging(module):
        try:
            autolog_fn = LIBRARY_TO_AUTOLOG_FN[module.__name__]
            try:
                needed_params = list(
                    inspect.signature(autolog_fn).parameters.keys())
                filtered = {k: v for k, v in locals_copy if k in needed_params}
            except Exception:  # pylint: disable=broad-except
                filtered = {}

            autolog_fn(**filtered)
            _logger.info("Autologging successfully enabled for %s.",
                         module.__name__)
        except Exception as e:  # pylint: disable=broad-except
            if _is_testing():
                # Raise unexpected exceptions in test mode in order to detect
                # errors within dependent autologging integrations
                raise
            else:
                _logger.warning(
                    "Exception raised while enabling autologging for %s: %s",
                    module.__name__,
                    str(e),
                )

    # for each autolog library (except pyspark), register a post-import hook.
    # this way, we do not send any errors to the user until we know they are using the library.
    # the post-import hook also retroactively activates for previously-imported libraries.
    for module in list(set(LIBRARY_TO_AUTOLOG_FN.keys()) - set(["pyspark"])):
        register_post_import_hook(setup_autologging, module, overwrite=True)

    # for pyspark, we activate autologging immediately, without waiting for a module import.
    # this is because on Databricks a SparkSession already exists and the user can directly
    #   interact with it, and this activity should be logged.
    try:
        spark.autolog()
    except ImportError as ie:
        # if pyspark isn't installed, a user could potentially install it in the middle
        #   of their session so we want to enable autologging once they do
        if "pyspark" in str(ie):
            register_post_import_hook(setup_autologging,
                                      "pyspark",
                                      overwrite=True)
    except Exception as e:  # pylint: disable=broad-except
        if _is_testing():
            # Raise unexpected exceptions in test mode in order to detect
            # errors within dependent autologging integrations
            raise
        else:
            _logger.warning(
                "Exception raised while enabling autologging for spark: %s",
                str(e))
Esempio n. 4
0
def autolog(log_input_examples=False, log_model_signatures=True):  # pylint: disable=unused-argument
    """
    Enable autologging for all supported integrations.

    The parameters are passed to any autologging integrations that support them.

    See the :ref:`tracking docs <automatic-logging>` for a list of supported autologging
    integrations.

    :param log_input_examples: If ``True``, input examples from training datasets are collected and
                               logged along with model artifacts during training. If ``False``,
                               input examples are not logged.
    :param log_model_signatures: If ``True``,
                                 :py:class:`ModelSignatures <mlflow.models.ModelSignature>`
                                 describing model inputs and outputs are collected and logged along
                                 with model artifacts during training. If ``False``, signatures are
                                 not logged.
    """
    locals_copy = locals().items()

    # Mapping of library module name to specific autolog function
    # eg: mxnet.gluon is the actual library, mlflow.gluon.autolog is our autolog function for it
    LIBRARY_TO_AUTOLOG_FN = {
        "tensorflow": tensorflow.autolog,
        "keras": keras.autolog,
        "mxnet.gluon": gluon.autolog,
        "xgboost": xgboost.autolog,
        "lightgbm": lightgbm.autolog,
        "sklearn": sklearn.autolog,
        "fastai": fastai.autolog,
        "pyspark": spark.autolog,
        # TODO: Broaden this beyond pytorch_lightning as we add autologging support for more
        # Pytorch frameworks under mlflow.pytorch.autolog
        "pytorch_lightning": pytorch.autolog,
    }

    def setup_autologging(module):
        autolog_fn = LIBRARY_TO_AUTOLOG_FN[module.__name__]
        try:
            needed_params = list(inspect.signature(autolog_fn).parameters.keys())
            filtered = {k: v for k, v in locals_copy if k in needed_params}
        except ValueError:
            filtered = {}

        try:
            autolog_fn(**filtered)
            _logger.info("Autologging successfully enabled for %s.", module.__name__)
        except Exception as e:  # pylint: disable=broad-except
            _logger.warning(
                "Exception raised while enabling autologging for %s: %s", module.__name__, str(e)
            )

    # for each autolog library (except pyspark), register a post-import hook.
    # this way, we do not send any errors to the user until we know they are using the library.
    # the post-import hook also retroactively activates for previously-imported libraries.
    for module in list(set(LIBRARY_TO_AUTOLOG_FN.keys()) - set(["pyspark"])):
        register_post_import_hook(setup_autologging, module, overwrite=True)

    # for pyspark, we activate autologging immediately, without waiting for a module import.
    # this is because on Databricks a SparkSession already exists and the user can directly
    #   interact with it, and this activity should be logged.
    try:
        spark.autolog()
    except ImportError as ie:
        # if pyspark isn't installed, a user could potentially install it in the middle
        #   of their session so we want to enable autologging once they do
        if "pyspark" in str(ie):
            register_post_import_hook(setup_autologging, "pyspark", overwrite=True)
    except Exception as e:  # pylint: disable=broad-except
        _logger.warning("Exception raised while enabling autologging for spark: %s", str(e))