from mlflow.utils.autologging_utils import ( _get_new_training_session_class, autologging_integration, safe_patch, ) from mlflow.utils.autologging_utils import get_method_call_arg_value from mlflow.utils.file_utils import TempDir from mlflow.utils.mlflow_tags import MLFLOW_AUTOLOGGING, MLFLOW_PARENT_RUN_ID from mlflow.utils.validation import ( MAX_PARAMS_TAGS_PER_BATCH, MAX_PARAM_VAL_LENGTH, MAX_ENTITY_KEY_LENGTH, ) _logger = logging.getLogger(__name__) _SparkTrainingSession = _get_new_training_session_class() AUTOLOGGING_INTEGRATION_NAME = "pyspark.ml" def _read_log_model_allowlist_from_file(allowlist_file): allowlist = set() with open(allowlist_file) as f: for line in f: stripped = line.strip() is_blankline_or_comment = stripped == "" or stripped.startswith( "#") if not is_blankline_or_comment: allowlist.add(stripped) return allowlist
resolve_input_example_and_signature, _get_new_training_session_class, ) from mlflow.tracking._model_registry import DEFAULT_AWAIT_MAX_SLEEP_SECONDS FLAVOR_NAME = "sklearn" SERIALIZATION_FORMAT_PICKLE = "pickle" SERIALIZATION_FORMAT_CLOUDPICKLE = "cloudpickle" SUPPORTED_SERIALIZATION_FORMATS = [ SERIALIZATION_FORMAT_PICKLE, SERIALIZATION_FORMAT_CLOUDPICKLE ] _logger = logging.getLogger(__name__) _SklearnTrainingSession = _get_new_training_session_class() def get_default_conda_env(include_cloudpickle=False): """ :return: The default Conda environment for MLflow Models produced by calls to :func:`save_model()` and :func:`log_model()`. """ import sklearn pip_deps = ["scikit-learn=={}".format(sklearn.__version__)] if include_cloudpickle: import cloudpickle pip_deps += ["cloudpickle=={}".format(cloudpickle.__version__)] return _mlflow_conda_env(additional_pip_deps=pip_deps,