def test_fetch_project(local_git_repo, local_git_repo_uri): # The tests are as follows: # 1. Fetching a locally saved project. # 2. Fetching a project located in a Git repo root directory. # 3. Fetching a project located in a Git repo subdirectory. # 4. Passing a subdirectory works for local directories. test_list = [ (TEST_PROJECT_DIR, '', TEST_PROJECT_DIR), (local_git_repo_uri, '', local_git_repo), (local_git_repo_uri, 'example_project', os.path.join(local_git_repo, 'example_project')), (os.path.dirname(TEST_PROJECT_DIR), os.path.basename(TEST_PROJECT_DIR), TEST_PROJECT_DIR), ] for base_uri, subdirectory, expected in test_list: work_dir = mlflow.projects._fetch_project(uri=_build_uri( base_uri, subdirectory), force_tempdir=False) assert_dirs_equal(expected=expected, actual=work_dir) # Test that we correctly determine the dest directory to use when fetching a project. for force_tempdir, uri in [(True, TEST_PROJECT_DIR), (False, GIT_PROJECT_URI)]: dest_dir = mlflow.projects._fetch_project(uri=uri, force_tempdir=force_tempdir) assert os.path.commonprefix([dest_dir, tempfile.gettempdir() ]) == tempfile.gettempdir() assert os.path.exists(dest_dir) for force_tempdir, uri in [(None, TEST_PROJECT_DIR), (False, TEST_PROJECT_DIR)]: assert mlflow.projects._fetch_project(uri=uri, force_tempdir=force_tempdir) == \ os.path.abspath(TEST_PROJECT_DIR)
def test_fetch_project(local_git_repo, local_git_repo_uri, zipped_repo, httpserver): httpserver.serve_content(open(zipped_repo, 'rb').read()) # The tests are as follows: # 1. Fetching a locally saved project. # 2. Fetching a project located in a Git repo root directory. # 3. Fetching a project located in a Git repo subdirectory. # 4. Passing a subdirectory works for local directories. # 5. Fetching a remote ZIP file # 6. Using a local ZIP file # 7. Using a file:// URL to a local ZIP file test_list = [ (TEST_PROJECT_DIR, '', TEST_PROJECT_DIR), (local_git_repo_uri, '', local_git_repo), (local_git_repo_uri, 'example_project', os.path.join(local_git_repo, 'example_project')), (os.path.dirname(TEST_PROJECT_DIR), os.path.basename(TEST_PROJECT_DIR), TEST_PROJECT_DIR), (httpserver.url + '/%s.zip' % TEST_PROJECT_NAME, '', TEST_PROJECT_DIR), (zipped_repo, '', TEST_PROJECT_DIR), ('file://%s' % zipped_repo, '', TEST_PROJECT_DIR), ] for base_uri, subdirectory, expected in test_list: work_dir = mlflow.projects._fetch_project( uri=_build_uri(base_uri, subdirectory), force_tempdir=False) assert_dirs_equal(expected=expected, actual=work_dir) # Test that we correctly determine the dest directory to use when fetching a project. for force_tempdir, uri in [(True, TEST_PROJECT_DIR), (False, GIT_PROJECT_URI)]: dest_dir = mlflow.projects._fetch_project(uri=uri, force_tempdir=force_tempdir) assert os.path.commonprefix([dest_dir, tempfile.gettempdir()]) == tempfile.gettempdir() assert os.path.exists(dest_dir) for force_tempdir, uri in [(None, TEST_PROJECT_DIR), (False, TEST_PROJECT_DIR)]: assert mlflow.projects._fetch_project(uri=uri, force_tempdir=force_tempdir) == \ os.path.abspath(TEST_PROJECT_DIR)
def test_dont_remove_mlruns(tmpdir): # Fetching a directory containing an "mlruns" folder doesn't remove the "mlruns" folder src_dir = tmpdir.mkdir("mlruns-src-dir") src_dir.mkdir("mlruns").join("some-file.txt").write("hi") src_dir.join("MLproject").write("dummy MLproject contents") dst_dir = _fetch_project(uri=src_dir.strpath, version=None) assert_dirs_equal(expected=src_dir.strpath, actual=dst_dir)
def test__fetch_project(local_git_repo, local_git_repo_uri, zipped_repo, httpserver): httpserver.serve_content(open(zipped_repo, "rb").read()) # The tests are as follows: # 1. Fetching a locally saved project. # 2. Fetching a project located in a Git repo root directory. # 3. Fetching a project located in a Git repo subdirectory. # 4. Passing a subdirectory works for local directories. # 5. Fetching a remote ZIP file # 6. Using a local ZIP file # 7. Using a file:// URL to a local ZIP file test_list = [ (TEST_PROJECT_DIR, "", TEST_PROJECT_DIR), (local_git_repo_uri, "", local_git_repo), (local_git_repo_uri, "example_project", os.path.join(local_git_repo, "example_project")), (os.path.dirname(TEST_PROJECT_DIR), os.path.basename(TEST_PROJECT_DIR), TEST_PROJECT_DIR), (httpserver.url + "/%s.zip" % TEST_PROJECT_NAME, "", TEST_PROJECT_DIR), (zipped_repo, "", TEST_PROJECT_DIR), ("file://%s" % zipped_repo, "", TEST_PROJECT_DIR), ] for base_uri, subdirectory, expected in test_list: work_dir = _fetch_project(uri=_build_uri(base_uri, subdirectory)) assert_dirs_equal(expected=expected, actual=work_dir) # Test that we correctly determine the dest directory to use when fetching a project. fetched_local_project = _fetch_project(uri=TEST_PROJECT_DIR) assert os.path.abspath(fetched_local_project) == os.path.abspath( TEST_PROJECT_DIR) fetched_git_project = _fetch_project(GIT_PROJECT_URI) assert (os.path.commonprefix([fetched_git_project, tempfile.gettempdir() ]) == tempfile.gettempdir()) assert os.path.exists(fetched_git_project)
def test_fetch_and_clean_project(tmpdir): project_with_mlruns = tmpdir.mkdir("with-mlruns") project_with_mlruns.mkdir("mlruns").join("some-file").write("hi") project_without_mlruns = tmpdir.mkdir("without-mlruns") for proj in [project_with_mlruns, project_without_mlruns]: proj.join("MLproject").write("Hello") fetched0 = databricks._fetch_and_clean_project(project_with_mlruns.strpath) fetched1 = databricks._fetch_and_clean_project(project_without_mlruns.strpath) assert_dirs_equal(fetched0, fetched1) for fetched_dir in [fetched0, fetched1]: with open(os.path.join(fetched_dir, "MLproject")) as handle: assert handle.read() == "Hello"