Example #1
0
 def reflink(self, from_info, to_info):
     tmp_info = to_info.parent / tmp_fname(to_info.name)
     System.reflink(from_info, tmp_info)
     # NOTE: reflink has its own separate inode, so you can set permissions
     # that are different from the source.
     os.chmod(fspath_py35(tmp_info), self._file_mode)
     os.rename(fspath_py35(tmp_info), fspath_py35(to_info))
Example #2
0
def copyfile(src, dest, no_progress_bar=False, name=None):
    """Copy file with progress bar"""
    from dvc.exceptions import DvcException
    from dvc.progress import Tqdm
    from dvc.system import System

    src = fspath_py35(src)
    dest = fspath_py35(dest)

    name = name if name else os.path.basename(dest)
    total = os.stat(src).st_size

    if os.path.isdir(dest):
        dest = os.path.join(dest, os.path.basename(src))

    try:
        System.reflink(src, dest)
    except DvcException:
        with Tqdm(desc=name, disable=no_progress_bar, total=total,
                  bytes=True) as pbar:
            with open(src, "rb") as fsrc, open(dest, "wb+") as fdest:
                while True:
                    buf = fsrc.read(LOCAL_CHUNK_SIZE)
                    if not buf:
                        break
                    fdest.write(buf)
                    pbar.update(len(buf))
Example #3
0
def erepo_dir(tmp_path_factory, monkeypatch):
    from dvc.repo import Repo
    from dvc.remote.config import RemoteConfig

    path = TmpDir(fspath_py35(tmp_path_factory.mktemp("erepo")))

    # Chdir for git and dvc to work locally
    with monkeypatch.context() as m:
        m.chdir(fspath_py35(path))

        _git_init()
        path.dvc = Repo.init()
        path.scm = path.dvc.scm
        path.dvc_gen(REPO_TEMPLATE, commit="init repo")

        rconfig = RemoteConfig(path.dvc.config)
        rconfig.add("upstream", path.dvc.cache.local.cache_dir, default=True)
        path.scm_add([path.dvc.config.config_file], commit="add remote")

        path.dvc_gen("version", "master")
        path.scm_add([".gitignore", "version.dvc"], commit="master")

        path.scm.checkout("branch", create_new=True)
        (path / "version").unlink()  # For mac ???
        path.dvc_gen("version", "branch")
        path.scm_add([".gitignore", "version.dvc"], commit="branch")

        path.scm.checkout("master")
        path.dvc.close()

    return path
Example #4
0
def test_update_rev(tmp_dir, dvc, scm, git_dir):
    with git_dir.chdir():
        git_dir.scm_gen({"foo": "foo"}, commit="first")

    dvc.imp(fspath(git_dir), "foo")
    assert (tmp_dir / "foo.dvc").exists()

    with git_dir.chdir(), git_dir.branch("branch1", new=True):
        git_dir.scm_gen({"foo": "foobar"}, commit="branch1 commit")
        branch1_head = git_dir.scm.get_rev()

    with git_dir.chdir(), git_dir.branch("branch2", new=True):
        git_dir.scm_gen({"foo": "foobar foo"}, commit="branch2 commit")
        branch2_head = git_dir.scm.get_rev()

    stage = dvc.update("foo.dvc", rev="branch1")
    assert stage.deps[0].def_repo == {
        "url": fspath(git_dir),
        "rev": "branch1",
        "rev_lock": branch1_head,
    }
    with open(fspath_py35(tmp_dir / "foo")) as f:
        assert "foobar" == f.read()

    stage = dvc.update("foo.dvc", rev="branch2")
    assert stage.deps[0].def_repo == {
        "url": fspath(git_dir),
        "rev": "branch2",
        "rev_lock": branch2_head,
    }
    with open(fspath_py35(tmp_dir / "foo")) as f:
        assert "foobar foo" == f.read()
Example #5
0
 def copy(self, from_info, to_info):
     tmp_info = to_info.parent / tmp_fname(to_info.name)
     try:
         System.copy(from_info, tmp_info)
         os.rename(fspath_py35(tmp_info), fspath_py35(to_info))
     except Exception:
         self.remove(tmp_info)
         raise
Example #6
0
def _read_params(repo, configs, rev):
    res = {}
    for config in configs:
        if not repo.tree.exists(fspath_py35(config)):
            continue

        with repo.tree.open(fspath_py35(config), "r") as fobj:
            try:
                res[str(config)] = yaml.safe_load(fobj)
            except yaml.YAMLError:
                logger.debug(
                    "failed to read '%s' on '%s'", config, rev, exc_info=True
                )
                continue

    return res
Example #7
0
    def is_protected(self, path_info):
        if not self.exists(path_info):
            return False

        mode = os.stat(fspath_py35(path_info)).st_mode

        return stat.S_IMODE(mode) == self.CACHE_MODE
Example #8
0
    def get(self, path_info):
        """Gets the checksum for the specified path info. Checksum will be
        retrieved from the state database if available.

        Args:
            path_info (dict): path info to get the checksum for.

        Returns:
            str or None: checksum for the specified path info or None if it
            doesn't exist in the state database.
        """
        assert path_info.scheme == "local"
        path = fspath_py35(path_info)

        if not os.path.exists(path):
            return None

        actual_mtime, actual_size = get_mtime_and_size(path, self.repo.tree)
        actual_inode = get_inode(path)

        existing_record = self.get_state_record_for_inode(actual_inode)
        if not existing_record:
            return None

        mtime, size, checksum, _ = existing_record
        if self._file_metadata_changed(actual_mtime, mtime, actual_size, size):
            return None

        self._update_state_record_timestamp_for_inode(actual_inode)
        return checksum
Example #9
0
 def chdir(self):
     old = os.getcwd()
     try:
         os.chdir(fspath_py35(self))
         yield
     finally:
         os.chdir(old)
Example #10
0
    def save(self, path_info, checksum):
        """Save checksum for the specified path info.

        Args:
            path_info (dict): path_info to save checksum for.
            checksum (str): checksum to save.
        """
        assert path_info.scheme == "local"
        assert checksum is not None
        assert os.path.exists(fspath_py35(path_info))

        actual_mtime, actual_size = get_mtime_and_size(
            path_info, self.repo.tree
        )
        actual_inode = get_inode(path_info)

        existing_record = self.get_state_record_for_inode(actual_inode)
        if not existing_record:
            self._insert_new_state_record(
                actual_inode, actual_mtime, actual_size, checksum
            )
            return

        self._update_state_for_path_changed(
            actual_inode, actual_mtime, actual_size, checksum
        )
Example #11
0
def test_ignore_external(tmp_dir, scm, dvc, tmp_path_factory):
    tmp_dir.gen(".dvcignore", "*.backup\ntmp")
    ext_dir = TmpDir(fspath_py35(tmp_path_factory.mktemp("external_dir")))
    ext_dir.gen({"y.backup": "y", "tmp": "ext tmp"})

    remote = RemoteLOCAL(dvc, {})
    result = {relpath(f, ext_dir) for f in remote.walk_files(ext_dir)}
    assert result == {"y.backup", "tmp"}
Example #12
0
    def _upload(
        self, from_file, to_info, name=None, no_progress_bar=False, **_kwargs
    ):
        makedirs(to_info.parent, exist_ok=True)

        tmp_file = tmp_fname(to_info)
        copyfile(
            from_file, tmp_file, name=name, no_progress_bar=no_progress_bar
        )
        os.rename(tmp_file, fspath_py35(to_info))
Example #13
0
def test_pipeline_file_target_ops(tmp_dir, dvc, local_remote, run_copy):
    tmp_dir.dvc_gen("foo", "foo")
    run_copy("foo", "bar", single_stage=True)

    tmp_dir.dvc_gen("lorem", "lorem")
    run_copy("lorem", "lorem2", name="copy-lorem-lorem2")

    tmp_dir.dvc_gen("ipsum", "ipsum")
    run_copy("ipsum", "baz", name="copy-ipsum-baz")

    outs = ["foo", "bar", "lorem", "ipsum", "baz", "lorem2"]

    remove(dvc.stage_cache.cache_dir)

    dvc.push()
    # each one's a copy of other, hence 3
    assert len(recurse_list_dir(fspath_py35(local_remote))) == 3

    clean(outs, dvc)
    assert set(dvc.pull(["dvc.yaml"])["added"]) == {"lorem2", "baz"}

    clean(outs, dvc)
    assert set(dvc.pull()["added"]) == set(outs)

    # clean everything in remote and push
    clean(local_remote.iterdir())
    dvc.push(["dvc.yaml:copy-ipsum-baz"])
    assert len(recurse_list_dir(fspath_py35(local_remote))) == 1

    clean(local_remote.iterdir())
    dvc.push(["dvc.yaml"])
    assert len(recurse_list_dir(fspath_py35(local_remote))) == 2

    with pytest.raises(StageNotFound):
        dvc.push(["dvc.yaml:StageThatDoesNotExist"])

    with pytest.raises(StageNotFound):
        dvc.pull(["dvc.yaml:StageThatDoesNotExist"])
Example #14
0
def _read_metrics(repo, metrics, rev):
    tree = RepoTree(repo)

    res = {}
    for metric in metrics:
        if not tree.exists(fspath_py35(metric)):
            continue

        with tree.open(fspath_py35(metric), "r") as fobj:
            try:
                # NOTE this also supports JSON
                val = yaml.safe_load(fobj)
            except yaml.YAMLError:
                logger.debug(
                    "failed to read '%s' on '%s'", metric, rev, exc_info=True
                )
                continue

            val = _extract_metrics(val)
            if val:
                res[str(metric)] = val

    return res
Example #15
0
def makedirs(path, exist_ok=False, mode=None):
    path = fspath_py35(path)

    if mode is None:
        os.makedirs(path, exist_ok=exist_ok)
        return

    # utilize umask to set proper permissions since Python 3.7 the `mode`
    # `makedirs` argument no longer affects the file permission bits of
    # newly-created intermediate-level directories.
    umask = os.umask(0o777 - mode)
    try:
        os.makedirs(path, exist_ok=exist_ok)
    finally:
        os.umask(umask)
Example #16
0
    def verify_metric(self):
        if not self.metric:
            return

        path = fspath_py35(self.path_info)
        if not os.path.exists(path):
            return

        if os.path.isdir(path):
            msg = "directory '{}' cannot be used as metrics."
            raise DvcException(msg.format(self.path_info))

        if not istextfile(path):
            msg = "binary file '{}' cannot be used as metrics."
            raise DvcException(msg.format(self.path_info))
Example #17
0
    def protect(path_info):
        path = fspath_py35(path_info)
        mode = stat.S_IREAD | stat.S_IRGRP | stat.S_IROTH

        try:
            os.chmod(path, mode)
        except OSError as exc:
            # In share cache scenario, we might not own the cache file, so we
            # need to check if cache file is already protected.
            if exc.errno not in [errno.EPERM, errno.EACCES]:
                raise

            actual = os.stat(path).st_mode
            if actual & mode != mode:
                raise
Example #18
0
    def _get_info(self):
        if not self.exists:
            return {}

        with open(fspath_py35(self.path_info), "r") as fobj:
            try:
                config = yaml.safe_load(fobj)
            except yaml.YAMLError as exc:
                raise BadParamFileError(
                    "Unable to read parameters from '{}'".format(self)
                ) from exc

        ret = {}
        for param in self.params:
            ret[param] = dpath.util.get(config, param, separator=".")
        return ret
Example #19
0
    def _parse_path(self, remote, path):
        parsed = urlparse(path)
        if parsed.scheme == "remote":
            p = remote.path_info / parsed.path.lstrip("/")
        else:
            # NOTE: we can path either from command line or .dvc file,
            # so we should expect both posix and windows style paths.
            # PathInfo accepts both, i.e. / works everywhere, \ only on win.
            #
            # FIXME: if we have Windows path containing / or posix one with \
            # then we have #2059 bug and can't really handle that.
            p = self.REMOTE.path_cls(path)
            if not p.is_absolute():
                p = self.stage.wdir / p

        abs_p = os.path.abspath(os.path.normpath(fspath_py35(p)))
        return self.REMOTE.path_cls(abs_p)
Example #20
0
def test_isdir_isfile(tmp_dir, dvc):
    tmp_dir.gen({"datafile": "data", "datadir": {"foo": "foo", "bar": "bar"}})

    tree = DvcTree(dvc)
    assert not tree.isdir("datadir")
    assert not tree.isfile("datadir")
    assert not tree.isdir("datafile")
    assert not tree.isfile("datafile")

    dvc.add(["datadir", "datafile"])
    shutil.rmtree(fspath_py35(tmp_dir / "datadir"))
    (tmp_dir / "datafile").unlink()

    assert tree.isdir("datadir")
    assert not tree.isfile("datadir")
    assert not tree.isdir("datafile")
    assert tree.isfile("datafile")
Example #21
0
def file_md5(fname):
    """ get the (md5 hexdigest, md5 digest) of a file """
    from dvc.progress import Tqdm
    from dvc.istextfile import istextfile

    fname = fspath_py35(fname)

    if os.path.exists(fname):
        hash_md5 = hashlib.md5()
        binary = not istextfile(fname)
        size = os.path.getsize(fname)
        no_progress_bar = True
        if size >= LARGE_FILE_SIZE:
            no_progress_bar = False
            msg = (
                "Computing md5 for a large file '{}'. This is only done once."
            )
            logger.info(msg.format(relpath(fname)))
        name = relpath(fname)

        with Tqdm(
            desc=name,
            disable=no_progress_bar,
            total=size,
            bytes=True,
            leave=False,
        ) as pbar:
            with open(fname, "rb") as fobj:
                while True:
                    data = fobj.read(LOCAL_CHUNK_SIZE)
                    if not data:
                        break

                    if binary:
                        chunk = data
                    else:
                        chunk = dos2unix(data)

                    hash_md5.update(chunk)
                    pbar.update(len(data))

        return (hash_md5.hexdigest(), hash_md5.digest())

    return (None, None)
Example #22
0
    def save_link(self, path_info):
        """Adds the specified path to the list of links created by dvc. This
        list is later used on `dvc checkout` to cleanup old links.

        Args:
            path_info (dict): path info to add to the list of links.
        """
        assert path_info.scheme == "local"

        if not os.path.exists(fspath_py35(path_info)):
            return

        mtime, _ = get_mtime_and_size(path_info, self.repo.tree)
        inode = get_inode(path_info)
        relative_path = relpath(path_info, self.root_dir)

        cmd = "REPLACE INTO {}(path, inode, mtime) " "VALUES (?, ?, ?)".format(
            self.LINK_STATE_TABLE)
        self._execute(cmd, (relative_path, self._to_sqlite(inode), mtime))
Example #23
0
    def protect(self, path_info):
        path = fspath_py35(path_info)
        mode = self.CACHE_MODE

        try:
            os.chmod(path, mode)
        except OSError as exc:
            # There is nothing we need to do in case of a read-only file system
            if exc.errno == errno.EROFS:
                return

            # In shared cache scenario, we might not own the cache file, so we
            # need to check if cache file is already protected.
            if exc.errno not in [errno.EPERM, errno.EACCES]:
                raise

            actual = stat.S_IMODE(os.stat(path).st_mode)
            if actual != mode:
                raise
Example #24
0
def erepo_dir(tmp_path_factory, monkeypatch):
    from dvc.repo import Repo
    from dvc.remote.config import RemoteConfig

    path = TmpDir(fspath_py35(tmp_path_factory.mktemp("erepo")))

    # Chdir for git and dvc to work locally
    with path.chdir():
        _git_init()
        path.dvc = Repo.init()
        path.scm = path.dvc.scm
        path.scm.commit("init dvc")

        rconfig = RemoteConfig(path.dvc.config)
        rconfig.add("upstream", path.dvc.cache.local.cache_dir, default=True)
        path.scm_add([path.dvc.config.config_file], commit="add remote")

        path.dvc.close()

    return path
Example #25
0
    def _open_cached(self, out, remote=None, mode="r", encoding=None):
        if out.isdir():
            raise ValueError("Can't open a dir")

        cache_file = self.cache.local.checksum_to_path_info(out.checksum)
        cache_file = fspath_py35(cache_file)

        if os.path.exists(cache_file):
            return open(cache_file, mode=mode, encoding=encoding)

        try:
            remote_obj = self.cloud.get_remote(remote)
            remote_info = remote_obj.checksum_to_path_info(out.checksum)
            return remote_obj.open(remote_info, mode=mode, encoding=encoding)
        except RemoteActionNotImplemented:
            with self.state:
                cache_info = out.get_used_cache(remote=remote)
                self.cloud.pull(cache_info, remote=remote)

            return open(cache_file, mode=mode, encoding=encoding)
Example #26
0
 def isfile(path_info):
     return os.path.isfile(fspath_py35(path_info))
Example #27
0
 def exists(path_info):
     assert path_info.scheme == "local"
     return os.path.lexists(fspath_py35(path_info))
Example #28
0
 def isdir(path_info):
     return os.path.isdir(fspath_py35(path_info))
Example #29
0
 def open(path_info, mode="r", encoding=None):
     return open(fspath_py35(path_info), mode=mode, encoding=encoding)
Example #30
0
 def getsize(path_info):
     return os.path.getsize(fspath_py35(path_info))