def test_get_repl_id(): # Outside of Databricks environments, the Databricks REPL ID should be absent assert databricks_utils.get_repl_id() is None mock_dbutils = mock.MagicMock() mock_dbutils.entry_point.getReplId.return_value = "testReplId1" with mock.patch("mlflow.utils.databricks_utils._get_dbutils", return_value=mock_dbutils): assert databricks_utils.get_repl_id() == "testReplId1" mock_sparkcontext_inst = mock.MagicMock() mock_sparkcontext_inst.getLocalProperty.return_value = "testReplId2" mock_sparkcontext_class = mock.MagicMock() mock_sparkcontext_class.getOrCreate.return_value = mock_sparkcontext_inst mock_spark = mock.MagicMock() mock_spark.SparkContext = mock_sparkcontext_class import builtins original_import = builtins.__import__ def mock_import(name, *args, **kwargs): if name == "pyspark": return mock_spark else: return original_import(name, *args, **kwargs) with mock.patch("builtins.__import__", side_effect=mock_import): assert databricks_utils.get_repl_id() == "testReplId2"
def get_or_create_nfs_tmp_dir(): """ Get or create a temporary NFS directory which will be removed once python process exit. """ from mlflow.utils.databricks_utils import is_in_databricks_runtime, get_repl_id from mlflow.utils.nfs_on_spark import get_nfs_cache_root_dir nfs_root_dir = get_nfs_cache_root_dir() if is_in_databricks_runtime() and get_repl_id() is not None: # Note: In databricks, atexit hook does not work. # The {nfs_root_dir}/repl_tmp_data/{repl_id} directory will be removed once databricks # notebook detaches. # The repl_tmp_data directory is designed to be used by all kinds of applications, # so create a child directory "mlflow" for storing mlflow temp data. tmp_nfs_dir = os.path.join(nfs_root_dir, "repl_tmp_data", get_repl_id(), "mlflow") os.makedirs(tmp_nfs_dir, exist_ok=True) else: tmp_nfs_dir = tempfile.mkdtemp(dir=nfs_root_dir) # mkdtemp creates a directory with permission 0o700 # change it to be 0o777 to ensure it can be seen in spark UDF os.chmod(tmp_nfs_dir, 0o777) atexit.register(shutil.rmtree, tmp_nfs_dir, ignore_errors=True) return tmp_nfs_dir