Ejemplo n.º 1
0
def test_fetch_project(local_git_repo, local_git_repo_uri):
    # The tests are as follows:
    # 1. Fetching a locally saved project.
    # 2. Fetching a project located in a Git repo root directory.
    # 3. Fetching a project located in a Git repo subdirectory.
    # 4. Passing a subdirectory works for local directories.
    test_list = [
        (TEST_PROJECT_DIR, '', TEST_PROJECT_DIR),
        (local_git_repo_uri, '', local_git_repo),
        (local_git_repo_uri, 'example_project',
         os.path.join(local_git_repo, 'example_project')),
        (os.path.dirname(TEST_PROJECT_DIR), os.path.basename(TEST_PROJECT_DIR),
         TEST_PROJECT_DIR),
    ]
    for base_uri, subdirectory, expected in test_list:
        work_dir = mlflow.projects._fetch_project(uri=_build_uri(
            base_uri, subdirectory),
                                                  force_tempdir=False)
        assert_dirs_equal(expected=expected, actual=work_dir)
    # Test that we correctly determine the dest directory to use when fetching a project.
    for force_tempdir, uri in [(True, TEST_PROJECT_DIR),
                               (False, GIT_PROJECT_URI)]:
        dest_dir = mlflow.projects._fetch_project(uri=uri,
                                                  force_tempdir=force_tempdir)
        assert os.path.commonprefix([dest_dir, tempfile.gettempdir()
                                     ]) == tempfile.gettempdir()
        assert os.path.exists(dest_dir)
    for force_tempdir, uri in [(None, TEST_PROJECT_DIR),
                               (False, TEST_PROJECT_DIR)]:
        assert mlflow.projects._fetch_project(uri=uri, force_tempdir=force_tempdir) == \
               os.path.abspath(TEST_PROJECT_DIR)
Ejemplo n.º 2
0
def test_fetch_project(local_git_repo, local_git_repo_uri, zipped_repo, httpserver):
    httpserver.serve_content(open(zipped_repo, 'rb').read())
    # The tests are as follows:
    # 1. Fetching a locally saved project.
    # 2. Fetching a project located in a Git repo root directory.
    # 3. Fetching a project located in a Git repo subdirectory.
    # 4. Passing a subdirectory works for local directories.
    # 5. Fetching a remote ZIP file
    # 6. Using a local ZIP file
    # 7. Using a file:// URL to a local ZIP file
    test_list = [
        (TEST_PROJECT_DIR, '', TEST_PROJECT_DIR),
        (local_git_repo_uri, '', local_git_repo),
        (local_git_repo_uri, 'example_project', os.path.join(local_git_repo, 'example_project')),
        (os.path.dirname(TEST_PROJECT_DIR), os.path.basename(TEST_PROJECT_DIR), TEST_PROJECT_DIR),
        (httpserver.url + '/%s.zip' % TEST_PROJECT_NAME, '', TEST_PROJECT_DIR),
        (zipped_repo, '', TEST_PROJECT_DIR),
        ('file://%s' % zipped_repo, '', TEST_PROJECT_DIR),
    ]
    for base_uri, subdirectory, expected in test_list:
        work_dir = mlflow.projects._fetch_project(
            uri=_build_uri(base_uri, subdirectory), force_tempdir=False)
        assert_dirs_equal(expected=expected, actual=work_dir)
    # Test that we correctly determine the dest directory to use when fetching a project.
    for force_tempdir, uri in [(True, TEST_PROJECT_DIR), (False, GIT_PROJECT_URI)]:
        dest_dir = mlflow.projects._fetch_project(uri=uri, force_tempdir=force_tempdir)
        assert os.path.commonprefix([dest_dir, tempfile.gettempdir()]) == tempfile.gettempdir()
        assert os.path.exists(dest_dir)
    for force_tempdir, uri in [(None, TEST_PROJECT_DIR), (False, TEST_PROJECT_DIR)]:
        assert mlflow.projects._fetch_project(uri=uri, force_tempdir=force_tempdir) == \
               os.path.abspath(TEST_PROJECT_DIR)
Ejemplo n.º 3
0
def test_dont_remove_mlruns(tmpdir):
    # Fetching a directory containing an "mlruns" folder doesn't remove the "mlruns" folder
    src_dir = tmpdir.mkdir("mlruns-src-dir")
    src_dir.mkdir("mlruns").join("some-file.txt").write("hi")
    src_dir.join("MLproject").write("dummy MLproject contents")
    dst_dir = _fetch_project(uri=src_dir.strpath, version=None)
    assert_dirs_equal(expected=src_dir.strpath, actual=dst_dir)
Ejemplo n.º 4
0
def test__fetch_project(local_git_repo, local_git_repo_uri, zipped_repo,
                        httpserver):
    httpserver.serve_content(open(zipped_repo, "rb").read())
    # The tests are as follows:
    # 1. Fetching a locally saved project.
    # 2. Fetching a project located in a Git repo root directory.
    # 3. Fetching a project located in a Git repo subdirectory.
    # 4. Passing a subdirectory works for local directories.
    # 5. Fetching a remote ZIP file
    # 6. Using a local ZIP file
    # 7. Using a file:// URL to a local ZIP file
    test_list = [
        (TEST_PROJECT_DIR, "", TEST_PROJECT_DIR),
        (local_git_repo_uri, "", local_git_repo),
        (local_git_repo_uri, "example_project",
         os.path.join(local_git_repo, "example_project")),
        (os.path.dirname(TEST_PROJECT_DIR), os.path.basename(TEST_PROJECT_DIR),
         TEST_PROJECT_DIR),
        (httpserver.url + "/%s.zip" % TEST_PROJECT_NAME, "", TEST_PROJECT_DIR),
        (zipped_repo, "", TEST_PROJECT_DIR),
        ("file://%s" % zipped_repo, "", TEST_PROJECT_DIR),
    ]
    for base_uri, subdirectory, expected in test_list:
        work_dir = _fetch_project(uri=_build_uri(base_uri, subdirectory))
        assert_dirs_equal(expected=expected, actual=work_dir)
    # Test that we correctly determine the dest directory to use when fetching a project.
    fetched_local_project = _fetch_project(uri=TEST_PROJECT_DIR)
    assert os.path.abspath(fetched_local_project) == os.path.abspath(
        TEST_PROJECT_DIR)
    fetched_git_project = _fetch_project(GIT_PROJECT_URI)
    assert (os.path.commonprefix([fetched_git_project,
                                  tempfile.gettempdir()
                                  ]) == tempfile.gettempdir())
    assert os.path.exists(fetched_git_project)
Ejemplo n.º 5
0
def test_fetch_and_clean_project(tmpdir):
    project_with_mlruns = tmpdir.mkdir("with-mlruns")
    project_with_mlruns.mkdir("mlruns").join("some-file").write("hi")
    project_without_mlruns = tmpdir.mkdir("without-mlruns")
    for proj in [project_with_mlruns, project_without_mlruns]:
        proj.join("MLproject").write("Hello")
    fetched0 = databricks._fetch_and_clean_project(project_with_mlruns.strpath)
    fetched1 = databricks._fetch_and_clean_project(project_without_mlruns.strpath)
    assert_dirs_equal(fetched0, fetched1)
    for fetched_dir in [fetched0, fetched1]:
        with open(os.path.join(fetched_dir, "MLproject")) as handle:
            assert handle.read() == "Hello"