Ejemplo n.º 1
0
def test_get_repl_id():
    # Outside of Databricks environments, the Databricks REPL ID should be absent
    assert databricks_utils.get_repl_id() is None

    mock_dbutils = mock.MagicMock()
    mock_dbutils.entry_point.getReplId.return_value = "testReplId1"
    with mock.patch("mlflow.utils.databricks_utils._get_dbutils", return_value=mock_dbutils):
        assert databricks_utils.get_repl_id() == "testReplId1"

    mock_sparkcontext_inst = mock.MagicMock()
    mock_sparkcontext_inst.getLocalProperty.return_value = "testReplId2"
    mock_sparkcontext_class = mock.MagicMock()
    mock_sparkcontext_class.getOrCreate.return_value = mock_sparkcontext_inst
    mock_spark = mock.MagicMock()
    mock_spark.SparkContext = mock_sparkcontext_class

    import builtins

    original_import = builtins.__import__

    def mock_import(name, *args, **kwargs):
        if name == "pyspark":
            return mock_spark
        else:
            return original_import(name, *args, **kwargs)

    with mock.patch("builtins.__import__", side_effect=mock_import):
        assert databricks_utils.get_repl_id() == "testReplId2"
Ejemplo n.º 2
0
def get_or_create_nfs_tmp_dir():
    """
    Get or create a temporary NFS directory which will be removed once python process exit.
    """
    from mlflow.utils.databricks_utils import is_in_databricks_runtime, get_repl_id
    from mlflow.utils.nfs_on_spark import get_nfs_cache_root_dir

    nfs_root_dir = get_nfs_cache_root_dir()

    if is_in_databricks_runtime() and get_repl_id() is not None:
        # Note: In databricks, atexit hook does not work.
        # The {nfs_root_dir}/repl_tmp_data/{repl_id} directory will be removed once databricks
        # notebook detaches.
        # The repl_tmp_data directory is designed to be used by all kinds of applications,
        # so create a child directory "mlflow" for storing mlflow temp data.
        tmp_nfs_dir = os.path.join(nfs_root_dir, "repl_tmp_data", get_repl_id(), "mlflow")
        os.makedirs(tmp_nfs_dir, exist_ok=True)
    else:
        tmp_nfs_dir = tempfile.mkdtemp(dir=nfs_root_dir)
        # mkdtemp creates a directory with permission 0o700
        # change it to be 0o777 to ensure it can be seen in spark UDF
        os.chmod(tmp_nfs_dir, 0o777)
        atexit.register(shutil.rmtree, tmp_nfs_dir, ignore_errors=True)

    return tmp_nfs_dir