def _call(self, *args, _pypads_env: LoggerEnv, _pypads_autologgers=None, _logger_call, _logger_output, **kwargs): if _pypads_autologgers is None: _pypads_autologgers = [ "keras", "tensorflow", "xgboost", "gluon", "spark", "lightgbm", "sklearn" ] if 'tensorflow' in _pypads_autologgers and 'tensorflow' in sys.modules and 'tensorflow' not in added_auto_logs \ and is_package_available('tensorflow'): added_auto_logs.add('tensorflow') from mlflow import tensorflow tensorflow.autolog() if 'keras' in _pypads_autologgers and 'keras' in sys.modules and 'keras' not in added_auto_logs \ and is_package_available('keras'): added_auto_logs.add('keras') from mlflow import keras keras.autolog() if 'xgboost' in _pypads_autologgers and 'xgboost' in sys.modules and 'xgboost' not in added_auto_logs \ and is_package_available('xgboost'): added_auto_logs.add('xgboost') from mlflow import xgboost xgboost.autolog() if 'gluon' in _pypads_autologgers and 'gluon' in sys.modules and 'gluon' not in added_auto_logs \ and is_package_available('gluon'): added_auto_logs.add('gluon') from mlflow import gluon gluon.autolog() if 'spark' in _pypads_autologgers and 'spark' in sys.modules and 'spark' not in added_auto_logs \ and is_package_available('pyspark'): added_auto_logs.add('spark') from mlflow import spark spark.autolog() if 'lightgbm' in _pypads_autologgers and 'lightgbm' in sys.modules and 'lightgbm' not in added_auto_logs \ and is_package_available('lightgbm'): added_auto_logs.add('lightgbm') from mlflow import lightgbm lightgbm.autolog() if 'sklearn' in _pypads_autologgers and 'sklearn' in sys.modules and 'sklearn' not in added_auto_logs \ and is_package_available('sklearn'): added_auto_logs.add('sklearn') from mlflow import sklearn sklearn.autolog()
def autolog(log_input_example=False, log_model_signature=True): # pylint: disable=unused-argument locals_copy = locals().items() # Mapping of library module name to specific autolog function # eg: mxnet.gluon is the actual library, mlflow.gluon.autolog is our autolog function for it LIBRARY_TO_AUTOLOG_FN = { "tensorflow": tensorflow.autolog, "keras": keras.autolog, "mxnet.gluon": gluon.autolog, "xgboost": xgboost.autolog, "lightgbm": lightgbm.autolog, "sklearn": sklearn.autolog, "fastai": fastai.autolog, "pyspark": spark.autolog, } def setup_autologging(module): autolog_fn = LIBRARY_TO_AUTOLOG_FN[module.__name__] try: needed_params = list(inspect.signature(autolog_fn).parameters.keys()) filtered = {k: v for k, v in locals_copy if k in needed_params} except ValueError: filtered = {} try: autolog_fn(**filtered) _logger.info("Autologging successfully enabled for %s.", module.__name__) except Exception as e: # pylint: disable=broad-except _logger.warning( "Exception raised while enabling autologging for %s: %s", module.__name__, str(e) ) # for each autolog library (except pyspark), register a post-import hook. # this way, we do not send any errors to the user until we know they are using the library. # the post-import hook also retroactively activates for previously-imported libraries. for module in list(set(LIBRARY_TO_AUTOLOG_FN.keys()) - set(["pyspark"])): register_post_import_hook(setup_autologging, module, overwrite=True) # for pyspark, we activate autologging immediately, without waiting for a module import. # this is because on Databricks a SparkSession already exists and the user can directly # interact with it, and this activity should be logged. try: spark.autolog() except ImportError as ie: # if pyspark isn't installed, a user could potentially install it in the middle # of their session so we want to enable autologging once they do if "pyspark" in str(ie): register_post_import_hook(setup_autologging, "pyspark", overwrite=True) except Exception as e: # pylint: disable=broad-except _logger.warning("Exception raised while enabling autologging for spark: %s", str(e))
def autolog( log_input_examples=False, log_model_signatures=True, log_models=True, disable=False, exclusive=False, ): # pylint: disable=unused-argument """ Enables (or disables) and configures autologging for all supported integrations. The parameters are passed to any autologging integrations that support them. See the :ref:`tracking docs <automatic-logging>` for a list of supported autologging integrations. :param log_input_examples: If ``True``, input examples from training datasets are collected and logged along with model artifacts during training. If ``False``, input examples are not logged. Note: Input examples are MLflow model attributes and are only collected if ``log_models`` is also ``True``. :param log_model_signatures: If ``True``, :py:class:`ModelSignatures <mlflow.models.ModelSignature>` describing model inputs and outputs are collected and logged along with model artifacts during training. If ``False``, signatures are not logged. Note: Model signatures are MLflow model attributes and are only collected if ``log_models`` is also ``True``. :param log_models: If ``True``, trained models are logged as MLflow model artifacts. If ``False``, trained models are not logged. Input examples and model signatures, which are attributes of MLflow models, are also omitted when ``log_models`` is ``False``. :param disable: If ``True``, disables all supported autologging integrations. If ``False``, enables all supported autologging integrations. :param exclusive: If ``True``, autologged content is not logged to user-created fluent runs. If ``False``, autologged content is logged to the active fluent run, which may be user-created. .. code-block:: python :caption: Example import numpy as np import mlflow.sklearn from mlflow.tracking import MlflowClient from sklearn.linear_model import LinearRegression def print_auto_logged_info(r): tags = {k: v for k, v in r.data.tags.items() if not k.startswith("mlflow.")} artifacts = [f.path for f in MlflowClient().list_artifacts(r.info.run_id, "model")] print("run_id: {}".format(r.info.run_id)) print("artifacts: {}".format(artifacts)) print("params: {}".format(r.data.params)) print("metrics: {}".format(r.data.metrics)) print("tags: {}".format(tags)) # prepare training data X = np.array([[1, 1], [1, 2], [2, 2], [2, 3]]) y = np.dot(X, np.array([1, 2])) + 3 # Auto log all the parameters, metrics, and artifacts mlflow.autolog() model = LinearRegression() with mlflow.start_run() as run: model.fit(X, y) # fetch the auto logged parameters and metrics for ended run print_auto_logged_info(mlflow.get_run(run_id=run.info.run_id)) .. code-block:: text :caption: Output run_id: fd10a17d028c47399a55ab8741721ef7 artifacts: ['model/MLmodel', 'model/conda.yaml', 'model/model.pkl'] params: {'copy_X': 'True', 'normalize': 'False', 'fit_intercept': 'True', 'n_jobs': 'None'} metrics: {'training_score': 1.0, 'training_rmse': 4.440892098500626e-16, 'training_r2_score': 1.0, 'training_mae': 2.220446049250313e-16, 'training_mse': 1.9721522630525295e-31} tags: {'estimator_class': 'sklearn.linear_model._base.LinearRegression', 'estimator_name': 'LinearRegression'} """ locals_copy = locals().items() # Mapping of library module name to specific autolog function # eg: mxnet.gluon is the actual library, mlflow.gluon.autolog is our autolog function for it LIBRARY_TO_AUTOLOG_FN = { "tensorflow": tensorflow.autolog, "keras": keras.autolog, "mxnet.gluon": gluon.autolog, "xgboost": xgboost.autolog, "lightgbm": lightgbm.autolog, "statsmodels": statsmodels.autolog, "sklearn": sklearn.autolog, "fastai": fastai.autolog, "pyspark": spark.autolog, # TODO: Broaden this beyond pytorch_lightning as we add autologging support for more # Pytorch frameworks under mlflow.pytorch.autolog "pytorch_lightning": pytorch.autolog, } def setup_autologging(module): try: autolog_fn = LIBRARY_TO_AUTOLOG_FN[module.__name__] try: needed_params = list( inspect.signature(autolog_fn).parameters.keys()) filtered = {k: v for k, v in locals_copy if k in needed_params} except Exception: # pylint: disable=broad-except filtered = {} autolog_fn(**filtered) _logger.info("Autologging successfully enabled for %s.", module.__name__) except Exception as e: # pylint: disable=broad-except if _is_testing(): # Raise unexpected exceptions in test mode in order to detect # errors within dependent autologging integrations raise else: _logger.warning( "Exception raised while enabling autologging for %s: %s", module.__name__, str(e), ) # for each autolog library (except pyspark), register a post-import hook. # this way, we do not send any errors to the user until we know they are using the library. # the post-import hook also retroactively activates for previously-imported libraries. for module in list(set(LIBRARY_TO_AUTOLOG_FN.keys()) - set(["pyspark"])): register_post_import_hook(setup_autologging, module, overwrite=True) # for pyspark, we activate autologging immediately, without waiting for a module import. # this is because on Databricks a SparkSession already exists and the user can directly # interact with it, and this activity should be logged. try: spark.autolog() except ImportError as ie: # if pyspark isn't installed, a user could potentially install it in the middle # of their session so we want to enable autologging once they do if "pyspark" in str(ie): register_post_import_hook(setup_autologging, "pyspark", overwrite=True) except Exception as e: # pylint: disable=broad-except if _is_testing(): # Raise unexpected exceptions in test mode in order to detect # errors within dependent autologging integrations raise else: _logger.warning( "Exception raised while enabling autologging for spark: %s", str(e))
def autolog(log_input_examples=False, log_model_signatures=True): # pylint: disable=unused-argument """ Enable autologging for all supported integrations. The parameters are passed to any autologging integrations that support them. See the :ref:`tracking docs <automatic-logging>` for a list of supported autologging integrations. :param log_input_examples: If ``True``, input examples from training datasets are collected and logged along with model artifacts during training. If ``False``, input examples are not logged. :param log_model_signatures: If ``True``, :py:class:`ModelSignatures <mlflow.models.ModelSignature>` describing model inputs and outputs are collected and logged along with model artifacts during training. If ``False``, signatures are not logged. """ locals_copy = locals().items() # Mapping of library module name to specific autolog function # eg: mxnet.gluon is the actual library, mlflow.gluon.autolog is our autolog function for it LIBRARY_TO_AUTOLOG_FN = { "tensorflow": tensorflow.autolog, "keras": keras.autolog, "mxnet.gluon": gluon.autolog, "xgboost": xgboost.autolog, "lightgbm": lightgbm.autolog, "sklearn": sklearn.autolog, "fastai": fastai.autolog, "pyspark": spark.autolog, # TODO: Broaden this beyond pytorch_lightning as we add autologging support for more # Pytorch frameworks under mlflow.pytorch.autolog "pytorch_lightning": pytorch.autolog, } def setup_autologging(module): autolog_fn = LIBRARY_TO_AUTOLOG_FN[module.__name__] try: needed_params = list(inspect.signature(autolog_fn).parameters.keys()) filtered = {k: v for k, v in locals_copy if k in needed_params} except ValueError: filtered = {} try: autolog_fn(**filtered) _logger.info("Autologging successfully enabled for %s.", module.__name__) except Exception as e: # pylint: disable=broad-except _logger.warning( "Exception raised while enabling autologging for %s: %s", module.__name__, str(e) ) # for each autolog library (except pyspark), register a post-import hook. # this way, we do not send any errors to the user until we know they are using the library. # the post-import hook also retroactively activates for previously-imported libraries. for module in list(set(LIBRARY_TO_AUTOLOG_FN.keys()) - set(["pyspark"])): register_post_import_hook(setup_autologging, module, overwrite=True) # for pyspark, we activate autologging immediately, without waiting for a module import. # this is because on Databricks a SparkSession already exists and the user can directly # interact with it, and this activity should be logged. try: spark.autolog() except ImportError as ie: # if pyspark isn't installed, a user could potentially install it in the middle # of their session so we want to enable autologging once they do if "pyspark" in str(ie): register_post_import_hook(setup_autologging, "pyspark", overwrite=True) except Exception as e: # pylint: disable=broad-except _logger.warning("Exception raised while enabling autologging for spark: %s", str(e))