def test_hadoop_fs_checksum(mocker):
    mock_proc = mocker.Mock()

    out = b"/path/to/file\tMD5-of-0MD5-of-512CRC32C\t123456789"
    err = b""
    mock_proc.configure_mock(**{
        "communicate.return_value": (out, err),
        "returncode": 0
    })
    mock_popen = mocker.patch("subprocess.Popen", return_value=mock_proc)

    path_info = URLInfo("hdfs://example.com:1234/path/to/file")

    assert _hadoop_fs_checksum(path_info) == "123456789"
    mock_popen.assert_called_once_with(
        "hadoop fs -checksum hdfs://example.com:1234/path/to/file",
        shell=True,
        close_fds=os.name != "nt",
        executable=os.getenv("SHELL") if os.name != "nt" else None,
        env=os.environ,
        stdin=-1,
        stdout=-1,
        stderr=-1,
    )
    assert mock_proc.communicate.called
Beispiel #2
0
def test_ssh_dir_out(dvc_repo):
    if not _should_test_ssh():
        pytest.skip()

    # Set up remote and cache
    remote_url = get_ssh_url()
    assert main(["remote", "add", "upstream", remote_url]) == 0

    cache_url = get_ssh_url()
    assert main(["remote", "add", "sshcache", cache_url]) == 0
    assert main(["config", "cache.ssh", "sshcache"]) == 0

    # Recreating to reread configs
    repo = DvcRepo(dvc_repo.root_dir)

    url_info = URLInfo(remote_url)
    mkdir_cmd = "mkdir dir-out;cd dir-out;echo 1 > 1.txt; echo 2 > 2.txt"
    repo.run(
        cmd="ssh {netloc} 'cd {path};{cmd}'".format(netloc=url_info.netloc,
                                                    path=url_info.path,
                                                    cmd=mkdir_cmd),
        outs=[(url_info / "dir-out").url],
        deps=["foo"],  # add a fake dep to not consider this a callback
    )

    repo.reproduce("dir-out.dvc")
    repo.reproduce("dir-out.dvc", force=True)
Beispiel #3
0
def test_get_url_external(repo_dir, dvc_repo, erepo, remote_url):
    _set_remote_url_and_commit(erepo.dvc, remote_url)

    # Using file url to force clone to tmp repo
    repo_url = "file://" + erepo.dvc.root_dir
    expected_url = URLInfo(remote_url) / "ac/bd18db4cc2f85cedef654fccc4a4d8"
    assert api.get_url(repo_dir.FOO, repo=repo_url) == expected_url
Beispiel #4
0
def test_init_git(tmp_dir, scm, cloud):
    tmp_dir.scm_gen({"foo": "foo", "dir": {"bar": "bar"}}, commit="init")
    rev = scm.get_rev()
    scm.set_ref(EXEC_HEAD, rev)
    tmp_dir.gen("foo", "stashed")
    scm.gitpython.git.stash()
    rev = scm.resolve_rev("stash@{0}")
    scm.set_ref(EXEC_MERGE, rev)

    root_url = URLInfo(str(cloud)) / SSHExecutor.gen_dirname()

    executor = SSHExecutor(
        scm,
        ".",
        root_dir=root_url.path,
        host=root_url.host,
        port=root_url.port,
        username=TEST_SSH_USER,
        fs_factory=partial(_ssh_factory, cloud),
    )
    assert root_url.path == executor._repo_abspath

    fs = cloud._ssh
    assert fs.exists(posixpath.join(executor._repo_abspath, "foo"))
    assert fs.exists(posixpath.join(executor._repo_abspath, "dir"))
    assert fs.exists(posixpath.join(executor._repo_abspath, "dir", "bar"))
Beispiel #5
0
def test_get_url_external(remote_url, erepo_dir):
    _set_remote_url_and_commit(erepo_dir.dvc, remote_url)

    # Using file url to force clone to tmp repo
    repo_url = "file://{}".format(erepo_dir)
    expected_url = URLInfo(remote_url) / "ac/bd18db4cc2f85cedef654fccc4a4d8"
    assert api.get_url("foo", repo=repo_url) == expected_url
Beispiel #6
0
def test_exists(mocker):
    import io

    import requests

    from dvc.path_info import URLInfo

    res = requests.Response()
    # need to add `raw`, as `exists()` fallbacks to a streaming GET requests
    # on HEAD request failure.
    res.raw = io.StringIO("foo")

    fs = HTTPFileSystem(None, {})
    mocker.patch.object(fs, "request", return_value=res)

    url = URLInfo("https://example.org/file.txt")

    res.status_code = 200
    assert fs.exists(url) is True

    res.status_code = 404
    assert fs.exists(url) is False

    res.status_code = 403
    with pytest.raises(HTTPError):
        fs.exists(url)
Beispiel #7
0
def test_get_url_granular(tmp_dir, dvc, s3):
    tmp_dir.add_remote(config=s3.config)
    tmp_dir.dvc_gen(
        {"dir": {"foo": "foo", "bar": "bar", "nested": {"file": "file"}}}
    )

    expected_url = URLInfo(s3.url) / "5f/c28ea78987408341668eba6525ebd1.dir"
    assert api.get_url("dir") == expected_url

    expected_url = URLInfo(s3.url) / "ac/bd18db4cc2f85cedef654fccc4a4d8"
    assert api.get_url("dir/foo") == expected_url

    expected_url = URLInfo(s3.url) / "37/b51d194a7513e45b56f6524f2d51f2"
    assert api.get_url("dir/bar") == expected_url

    expected_url = URLInfo(s3.url) / "8c/7dd922ad47494fc02c388e12c00eac"
    assert api.get_url(os.path.join("dir", "nested", "file")) == expected_url
Beispiel #8
0
def test_get_url(repo_dir, dvc_repo, remote):
    remote_url = remote.get_url()

    run_dvc("remote", "add", "-d", "upstream", remote_url)
    dvc_repo.add(repo_dir.FOO)

    expected_url = URLInfo(remote_url) / "ac/bd18db4cc2f85cedef654fccc4a4d8"
    assert api.get_url(repo_dir.FOO) == expected_url
Beispiel #9
0
def test_get_url_external(tmp_dir, erepo_dir, cloud):
    erepo_dir.add_remote(config=cloud.config)
    with erepo_dir.chdir():
        erepo_dir.dvc_gen("foo", "foo", commit="add foo")

    # Using file url to force clone to tmp repo
    repo_url = f"file://{erepo_dir}"
    expected_url = URLInfo(cloud.url) / "ac/bd18db4cc2f85cedef654fccc4a4d8"
    assert api.get_url("foo", repo=repo_url) == expected_url
Beispiel #10
0
def test_get_url_external(erepo_dir, remote_url, setup_remote):
    setup_remote(erepo_dir.dvc, url=remote_url)
    with erepo_dir.chdir():
        erepo_dir.dvc_gen("foo", "foo", commit="add foo")

    # Using file url to force clone to tmp repo
    repo_url = f"file://{erepo_dir}"
    expected_url = URLInfo(remote_url) / "ac/bd18db4cc2f85cedef654fccc4a4d8"
    assert api.get_url("foo", repo=repo_url) == expected_url
Beispiel #11
0
def test_download_fails_on_error_code(dvc):
    with StaticFileServer() as httpd:
        url = "http://localhost:{}/".format(httpd.server_port)
        config = {"url": url}

        remote = HTTPRemote(dvc, config)

        with pytest.raises(HTTPError):
            remote._download(URLInfo(url) / "missing.txt", "missing.txt")
Beispiel #12
0
def test_download_fails_on_error_code(dvc):
    with StaticFileServer() as httpd:
        url = f"http://localhost:{httpd.server_port}/"
        config = {"url": url}

        tree = HTTPRemoteTree(dvc, config)

        with pytest.raises(HTTPError):
            tree._download(URLInfo(url) / "missing.txt", "missing.txt")
Beispiel #13
0
def test_content_length(mocker, headers, expected_size):
    res = requests.Response()
    res.headers.update(headers)
    res.status_code = 200

    fs = HTTPFileSystem()
    mocker.patch.object(fs, "request", return_value=res)

    url = URLInfo("https://example.org/file.txt")

    assert fs.info(url) == {"etag": None, "size": expected_size}
    assert fs._content_length(res) == expected_size
Beispiel #14
0
def test_ssh_dir_out(tmp_dir, dvc, ssh_server):
    tmp_dir.gen({"foo": "foo content"})

    # Set up remote and cache
    user = ssh_server.test_creds["username"]
    port = ssh_server.port
    keyfile = ssh_server.test_creds["key_filename"]

    remote_url = SSHMocked.get_url(user, port)
    assert main(["remote", "add", "upstream", remote_url]) == 0
    assert main(["remote", "modify", "upstream", "keyfile", keyfile]) == 0

    cache_url = SSHMocked.get_url(user, port)
    assert main(["remote", "add", "sshcache", cache_url]) == 0
    assert main(["config", "cache.ssh", "sshcache"]) == 0
    assert main(["remote", "modify", "sshcache", "keyfile", keyfile]) == 0

    # Recreating to reread configs
    repo = DvcRepo(dvc.root_dir)

    # To avoid "WARNING: UNPROTECTED PRIVATE KEY FILE" from ssh
    os.chmod(keyfile, 0o600)

    (tmp_dir / "script.py").write_text(
        "import sys, pathlib\n"
        "path = pathlib.Path(sys.argv[1])\n"
        "dir_out = path / 'dir-out'\n"
        "dir_out.mkdir()\n"
        "(dir_out / '1.txt').write_text('1')\n"
        "(dir_out / '2.txt').write_text('2')\n"
    )

    url_info = URLInfo(remote_url)
    repo.run(
        cmd="python {} {}".format(tmp_dir / "script.py", url_info.path),
        outs=["remote://upstream/dir-out"],
        deps=["foo"],  # add a fake dep to not consider this a callback
    )

    repo.reproduce("dir-out.dvc")
    repo.reproduce("dir-out.dvc", force=True)
Beispiel #15
0
def test_init_cache(tmp_dir, dvc, scm, cloud):
    foo = tmp_dir.dvc_gen("foo", "foo", commit="init")[0].outs[0]
    rev = scm.get_rev()
    scm.set_ref(EXEC_HEAD, rev)
    scm.set_ref(EXEC_MERGE, rev)
    root_url = URLInfo(str(cloud)) / SSHExecutor.gen_dirname()

    executor = SSHExecutor(
        scm,
        ".",
        root_dir=root_url.path,
        host=root_url.host,
        port=root_url.port,
        username=TEST_SSH_USER,
        fs_factory=partial(_ssh_factory, cloud),
    )
    executor.init_cache(dvc, rev)

    fs = cloud._ssh
    foo_hash = foo.hash_info.value
    assert fs.exists(
        posixpath.join(executor._repo_abspath, ".dvc", "cache", foo_hash[:2],
                       foo_hash[2:]))
Beispiel #16
0
def test_get_url(tmp_dir, dvc, remote):
    tmp_dir.dvc_gen("foo", "foo")

    expected_url = URLInfo(remote.url) / "ac/bd18db4cc2f85cedef654fccc4a4d8"
    assert api.get_url("foo") == expected_url
Beispiel #17
0
def test_get_url(remote_url, tmp_dir, dvc, repo_template):
    run_dvc("remote", "add", "-d", "upstream", remote_url)
    dvc.add("foo")

    expected_url = URLInfo(remote_url) / "ac/bd18db4cc2f85cedef654fccc4a4d8"
    assert api.get_url("foo") == expected_url
Beispiel #18
0
def test_get_url(tmp_dir, dvc, remote_url):
    run_dvc("remote", "add", "-d", "upstream", remote_url)
    tmp_dir.dvc_gen("foo", "foo")

    expected_url = URLInfo(remote_url) / "ac/bd18db4cc2f85cedef654fccc4a4d8"
    assert api.get_url("foo") == expected_url