Esempio n. 1
0
 def open_by_relpath(self, path, mode="r", encoding=None, **kwargs):
     try:
         abs_path = os.path.join(self.root_dir, path)
         with open(abs_path, mode, encoding=encoding) as fd:
             yield fd
     except FileNotFoundError:
         raise PathMissingError(path, self.url)
Esempio n. 2
0
    def pull_to(self, path, to_info):
        """
        Pull the corresponding file or directory specified by `path` and
        checkout it into `to_info`.

        It works with files tracked by Git and DVC, and also local files
        outside the repository.
        """
        out = None
        path_info = PathInfo(self.root_dir) / path

        with suppress(OutputNotFoundError):
            (out, ) = self.find_outs_by_path(fspath(path_info), strict=False)

        try:
            if out and out.use_cache:
                self._pull_cached(out, path_info, to_info)
                return

            # Check if it is handled by Git (it can't have an absolute path)
            if os.path.isabs(path):
                raise FileNotFoundError

            fs_copy(fspath(path_info), fspath(to_info))
        except FileNotFoundError:
            raise PathMissingError(path, self.url)
Esempio n. 3
0
def external_repo(url, rev=None, for_write=False):
    logger.debug("Creating external repo %s@%s", url, rev)
    path = _cached_clone(url, rev, for_write=for_write)
    if not rev:
        # Local HEAD points to the tip of whatever branch we first cloned from
        # (which may not be the default branch), use origin/HEAD here to get
        # the tip of the default branch
        rev = "refs/remotes/origin/HEAD"
    try:
        repo = ExternalRepo(path, url, rev, for_write=for_write)
    except NotDvcRepoError:
        repo = ExternalGitRepo(path, url, rev)

    try:
        yield repo
    except NoRemoteError:
        raise NoRemoteInExternalRepoError(url)
    except OutputNotFoundError as exc:
        if exc.repo is repo:
            raise NoOutputInExternalRepoError(exc.output, repo.root_dir, url)
        raise
    except FileMissingError as exc:
        raise PathMissingError(exc.path, url)
    finally:
        repo.close()
        if for_write:
            _remove(path)
Esempio n. 4
0
    def fetch_external(self, paths: Iterable, **kwargs):
        """Fetch specified external repo paths into cache.

        Returns 3-tuple in the form
            (downloaded, failed, list(cache_infos))
        where cache_infos can be used as checkout targets for the
        fetched paths.
        """
        download_results = []
        failed = 0
        root = PathInfo(self.root_dir)

        paths = [root / path for path in paths]

        def download_update(result):
            download_results.append(result)

        hash_infos = []
        for path in paths:
            with reraise(FileNotFoundError, PathMissingError(path, self.url)):
                metadata = self.repo_tree.metadata(path)

            self._check_repo(path, metadata.repo)
            repo = metadata.repo
            hash_info = self._fetch_to_cache(path, repo, download_update,
                                             **kwargs)
            hash_infos.append(hash_info)

        return sum(download_results), failed, hash_infos
Esempio n. 5
0
File: ls.py Progetto: zeta1999/dvc
def ls(url, target=None, rev=None, recursive=None, outs_only=False):
    from dvc.external_repo import external_repo
    from dvc.repo import Repo
    from dvc.utils import relpath

    with external_repo(url, rev) as repo:
        target_path_info = _get_target_path_info(repo, target)
        result = []
        if isinstance(repo, Repo):
            result.extend(_ls_outs_repo(repo, target_path_info, recursive))

        if not outs_only:
            result.extend(_ls_files_repo(target_path_info, recursive))

        if target and not result:
            raise PathMissingError(target, repo, output_only=outs_only)

        def prettify(path_info):
            if path_info == target_path_info:
                return path_info.name
            return relpath(path_info, target_path_info)

        result = list(set(map(prettify, result)))
        result.sort()
    return result
Esempio n. 6
0
def external_repo(url, rev=None, for_write=False, **kwargs):
    logger.debug("Creating external repo %s@%s", url, rev)
    path = _cached_clone(url, rev, for_write=for_write)
    # Local HEAD points to the tip of whatever branch we first cloned from
    # (which may not be the default branch), use origin/HEAD here to get
    # the tip of the default branch
    rev = rev or "refs/remotes/origin/HEAD"

    root_dir = path if for_write else os.path.realpath(path)
    conf = dict(
        root_dir=root_dir,
        url=url,
        scm=None if for_write else Git(root_dir),
        rev=None if for_write else rev,
        for_write=for_write,
        uninitialized=True,
        **kwargs,
    )
    repo = ExternalRepo(**conf)

    try:
        yield repo
    except NoRemoteError as exc:
        raise NoRemoteInExternalRepoError(url) from exc
    except OutputNotFoundError as exc:
        if exc.repo is repo:
            raise NoOutputInExternalRepoError(exc.output, repo.root_dir,
                                              url) from exc
        raise
    except FileMissingError as exc:
        raise PathMissingError(exc.path, url) from exc
    finally:
        repo.close()
        if for_write:
            _remove(path)
Esempio n. 7
0
    def fetch_external(self, paths: Iterable, **kwargs):
        """Fetch specified external repo paths into cache.

        Returns 3-tuple in the form
            (downloaded, failed, list(cache_infos))
        where cache_infos can be used as checkout targets for the
        fetched paths.
        """
        download_results = []
        failed = 0

        paths = [PathInfo(self.root_dir) / path for path in paths]

        def download_update(result):
            download_results.append(result)

        save_infos = []
        for path in paths:
            if not self.repo_tree.exists(path):
                raise PathMissingError(path, self.url)
            save_info = self.local_cache.save(
                path,
                None,
                tree=self.repo_tree,
                download_callback=download_update,
            )
            save_infos.append(save_info)

        return sum(download_results), failed, save_infos
Esempio n. 8
0
def external_repo(url,
                  rev=None,
                  for_write=False,
                  cache_dir=None,
                  cache_types=None,
                  **kwargs):
    from dvc.config import NoRemoteError
    from dvc.scm.git import Git

    logger.debug("Creating external repo %s@%s", url, rev)
    path = _cached_clone(url, rev, for_write=for_write)
    # Local HEAD points to the tip of whatever branch we first cloned from
    # (which may not be the default branch), use origin/HEAD here to get
    # the tip of the default branch
    rev = rev or "refs/remotes/origin/HEAD"

    cache_config = {
        "cache": {
            "dir": cache_dir or _get_cache_dir(url),
            "type": cache_types
        }
    }

    config = _get_remote_config(url) if os.path.isdir(url) else {}
    config.update(cache_config)

    root_dir = path if for_write else os.path.realpath(path)
    repo_kwargs = dict(
        root_dir=root_dir,
        url=url,
        scm=None if for_write else Git(root_dir),
        rev=None if for_write else rev,
        config=config,
        repo_factory=erepo_factory(url, cache_config),
        **kwargs,
    )

    if "subrepos" not in repo_kwargs:
        repo_kwargs["subrepos"] = True

    if "uninitialized" not in repo_kwargs:
        repo_kwargs["uninitialized"] = True

    repo = Repo(**repo_kwargs)

    try:
        yield repo
    except NoRemoteError as exc:
        raise NoRemoteInExternalRepoError(url) from exc
    except OutputNotFoundError as exc:
        if exc.repo is repo:
            raise NoOutputInExternalRepoError(exc.output, repo.root_dir,
                                              url) from exc
        raise
    except FileMissingError as exc:
        raise PathMissingError(exc.path, url) from exc
    finally:
        repo.close()
        if for_write:
            _remove(path)
Esempio n. 9
0
def ls(
    url,
    target=None,
    rev=None,
    recursive=None,
    outs_only=False,
):
    """Methods for getting files and outputs for the repo.

    Args:
        url (str): the repo url
        target (str, optional): relative path into the repo
        rev (str, optional): SHA commit, branch or tag name
        recursive (bool, optional): recursively walk the repo
        outs_only (bool, optional): show only DVC-artifacts

    Returns:
        list of `entry`

    Notes:
        `entry` is a dictionary with structure
        {
            "path": str,
            "isout": bool,
            "isdir": bool,
            "isexec": bool,
        }
    """
    from dvc.external_repo import external_repo
    from dvc.repo import Repo
    from dvc.utils import relpath

    with external_repo(url, rev) as repo:
        target_path_info = _get_target_path_info(repo, target)
        fs_nodes = []
        if isinstance(repo, Repo):
            fs_nodes.extend(_ls_outs_repo(repo, target_path_info, recursive))

        if not outs_only:
            fs_nodes.extend(_ls_files_repo(target_path_info, recursive))

        if target and not fs_nodes:
            raise PathMissingError(target, repo, output_only=outs_only)

        fs_nodes = {n["path_info"]: n for n in fs_nodes}.values()

        def get_entry(fs_node):
            path_info = fs_node["path_info"]
            path = (path_info.name if path_info == target_path_info else
                    relpath(path_info, target_path_info))
            return {
                "path": path,
                "isout": fs_node.get("isout", False),
                "isdir": fs_node.get("isdir", False),
                "isexec": fs_node.get("isexec", False),
            }

        entries = sorted(map(get_entry, fs_nodes), key=lambda f: f["path"])
    return entries
Esempio n. 10
0
 def open_by_relpath(self, path, mode="r", encoding=None, **kwargs):
     """Opens a specified resource as a file object."""
     try:
         abs_path = os.path.join(self.root_dir, path)
         with open(abs_path, mode, encoding=encoding) as fd:
             yield fd
     except FileNotFoundError:
         raise PathMissingError(path, self.url)
Esempio n. 11
0
    def get_checksum(self, path):
        path_info = PathInfo(self.root_dir) / path
        with reraise(FileNotFoundError, PathMissingError(path, self.url)):
            metadata = self.repo_tree.metadata(path_info)

        # skip subrepos to check for
        tree = self._get_tree_for(metadata.repo)
        return tree.get_hash(path_info)
Esempio n. 12
0
 def open_by_relpath(self, path, mode="r", encoding=None, **kwargs):
     """Opens a specified resource as a file object."""
     tree = RepoTree(self)
     try:
         with tree.open(path, mode=mode, encoding=encoding,
                        **kwargs) as fobj:
             yield fobj
     except FileNotFoundError:
         raise PathMissingError(path, self.url)
Esempio n. 13
0
    def pull_to(self, path, to_info):
        try:
            # Git handled files can't have absolute path
            if os.path.isabs(path):
                raise FileNotFoundError

            fs_copy(os.path.join(self.root_dir, path), fspath(to_info))
        except FileNotFoundError:
            raise PathMissingError(path, self.url)
Esempio n. 14
0
    def _get_used_and_obj(
        self,
        obj_only=False,
        **kwargs
    ) -> Tuple[Dict[Optional["ObjectDB"], Set["HashInfo"]], "HashFile"]:
        from dvc.config import NoRemoteError
        from dvc.exceptions import NoOutputOrStageError, PathMissingError
        from dvc.objects.stage import stage
        from dvc.objects.tree import Tree

        local_odb = self.repo.odb.local
        locked = kwargs.pop("locked", True)
        with self._make_repo(locked=locked,
                             cache_dir=local_odb.cache_dir) as repo:
            used_obj_ids = defaultdict(set)
            rev = repo.get_rev()
            if locked and self.def_repo.get(self.PARAM_REV_LOCK) is None:
                self.def_repo[self.PARAM_REV_LOCK] = rev

            path_info = PathInfo(repo.root_dir) / str(self.def_path)
            if not obj_only:
                try:
                    for odb, obj_ids in repo.used_objs(
                        [os.fspath(path_info)],
                            force=True,
                            jobs=kwargs.get("jobs"),
                            recursive=True,
                    ).items():
                        if odb is None:
                            odb = repo.cloud.get_remote_odb()
                            odb.read_only = True
                        self._check_circular_import(odb, obj_ids)
                        used_obj_ids[odb].update(obj_ids)
                except (NoRemoteError, NoOutputOrStageError):
                    pass

            try:
                staging, staged_obj = stage(
                    local_odb,
                    path_info,
                    repo.repo_fs,
                    local_odb.fs.PARAM_CHECKSUM,
                )
            except FileNotFoundError as exc:
                raise PathMissingError(self.def_path,
                                       self.def_repo[self.PARAM_URL]) from exc
            staging = copy(staging)
            staging.read_only = True

            self._staged_objs[rev] = staged_obj
            used_obj_ids[staging].add(staged_obj.hash_info)
            if isinstance(staged_obj, Tree):
                used_obj_ids[staging].update(entry.hash_info
                                             for _, entry in staged_obj)
            return used_obj_ids, staged_obj
Esempio n. 15
0
File: ls.py Progetto: shizacat/dvc
def ls(
    url,
    path=None,
    rev=None,
    recursive=None,
    outs_only=False,
):
    """Methods for getting files and outputs for the repo.

    Args:
        url (str): the repo url
        path (str, optional): relative path into the repo
        rev (str, optional): SHA commit, branch or tag name
        recursive (bool, optional): recursively walk the repo
        outs_only (bool, optional): show only DVC-artifacts

    Returns:
        list of `entry`

    Notes:
        `entry` is a dictionary with structure
        {
            "path": str,
            "isout": bool,
            "isdir": bool,
            "isexec": bool,
        }
    """
    from dvc.external_repo import external_repo
    from dvc.repo import Repo

    with external_repo(url, rev) as repo:
        path_info = PathInfo(repo.root_dir)
        if path:
            path_info /= path

        ret = {}
        if isinstance(repo, Repo):
            ret = _ls(repo, path_info, recursive, True)

        nondvc = {}
        if not outs_only:
            nondvc = _ls(repo, path_info, recursive, False)

        ret.update(nondvc)

        if path and not ret:
            raise PathMissingError(path, repo, output_only=outs_only)

        ret_list = []
        for path, info in ret.items():
            info["path"] = path
            ret_list.append(info)
        ret_list.sort(key=lambda f: f["path"])
        return ret_list
Esempio n. 16
0
 def open_by_relpath(self, path, mode="r", encoding=None, **kwargs):
     """Opens a specified resource as a file object."""
     path_info = PathInfo(self.root_dir) / path
     try:
         with self.repo_tree.open(path_info,
                                  mode=mode,
                                  encoding=encoding,
                                  **kwargs) as fobj:
             yield fobj
     except FileNotFoundError as exc:
         raise PathMissingError(path, self.url) from exc
Esempio n. 17
0
    def download(self, to):
        try:
            if self._copy_if_git_file(to.fspath):
                return

            out = self.fetch()
            to.info = copy.copy(out.info)
            to.checkout()
        except (FileNotFoundError):
            raise PathMissingError(self.def_path,
                                   self.def_repo[self.PARAM_URL])
Esempio n. 18
0
 def get_external(self, path, dest):
     """Convenience wrapper for fetch_external and checkout."""
     if self.local_cache:
         # fetch DVC and git files to tmpdir cache, then checkout
         _, _, save_infos = self.fetch_external([path])
         self.local_cache.checkout(PathInfo(dest), save_infos[0])
     else:
         # git-only erepo with no cache, just copy files directly
         # to dest
         path = PathInfo(self.root_dir) / path
         if not self.repo_tree.exists(path):
             raise PathMissingError(path, self.url)
         self.repo_tree.copytree(path, dest)
Esempio n. 19
0
def ls(
    url,
    path=None,
    rev=None,
    recursive=None,
    dvc_only=False,
):
    """Methods for getting files and outputs for the repo.

    Args:
        url (str): the repo url
        path (str, optional): relative path into the repo
        rev (str, optional): SHA commit, branch or tag name
        recursive (bool, optional): recursively walk the repo
        dvc_only (bool, optional): show only DVC-artifacts

    Returns:
        list of `entry`

    Notes:
        `entry` is a dictionary with structure
        {
            "path": str,
            "isout": bool,
            "isdir": bool,
            "isexec": bool,
        }
    """
    from dvc.external_repo import external_repo

    # use our own RepoTree instance instead of repo.repo_tree since we want to
    # fetch directory listings, but don't want to fetch file contents.
    with external_repo(url, rev, fetch=False, stream=True) as repo:
        path_info = PathInfo(repo.root_dir)
        if path:
            path_info /= path

        ret = _ls(repo.repo_tree, path_info, recursive, dvc_only)

        if path and not ret:
            raise PathMissingError(path, repo, dvc_only=dvc_only)

        ret_list = []
        for path, info in ret.items():
            info["path"] = path
            ret_list.append(info)
        ret_list.sort(key=lambda f: f["path"])
        return ret_list
Esempio n. 20
0
File: get.py Progetto: woodshop/dvc
def get(url, path, out=None, rev=None):
    out = resolve_output(path, out)

    if Stage.is_valid_filename(out):
        raise GetDVCFileError()

    # Creating a directory right beside the output to make sure that they
    # are on the same filesystem, so we could take the advantage of
    # reflink and/or hardlink. Not using tempfile.TemporaryDirectory
    # because it will create a symlink to tmpfs, which defeats the purpose
    # and won't work with reflink/hardlink.
    dpath = os.path.dirname(os.path.abspath(out))
    tmp_dir = os.path.join(dpath, "." + str(shortuuid.uuid()))
    try:
        try:
            with external_repo(cache_dir=tmp_dir, url=url, rev=rev) as repo:
                # Try any links possible to avoid data duplication.
                #
                # Not using symlink, because we need to remove cache after we
                # are done, and to make that work we would have to copy data
                # over anyway before removing the cache, so we might just copy
                # it right away.
                #
                # Also, we can't use theoretical "move" link type here, because
                # the same cache file might be used a few times in a directory.
                repo.cache.local.cache_types = ["reflink", "hardlink", "copy"]
                output = repo.find_out_by_relpath(path)
                if output.use_cache:
                    _get_cached(repo, output, out)
                    return
                # Non-cached output, fall through and try to copy from git.
        except (NotDvcRepoError, NoOutputInExternalRepoError):
            # Not a DVC repository or, possibly, path is not tracked by DVC.
            # Fall through and try to copy from git.
            pass

        if os.path.isabs(path):
            raise FileNotFoundError

        repo_dir = cached_clone(url, rev=rev)

        fs_copy(os.path.join(repo_dir, path), out)
    except (OutputNotFoundError, FileNotFoundError):
        raise PathMissingError(path, url)
    finally:
        remove(tmp_dir)
Esempio n. 21
0
def get(url, path, out=None, rev=None):
    out = resolve_output(path, out)

    if Stage.is_valid_filename(out):
        raise GetDVCFileError()

    # Creating a directory right beside the output to make sure that they
    # are on the same filesystem, so we could take the advantage of
    # reflink and/or hardlink. Not using tempfile.TemporaryDirectory
    # because it will create a symlink to tmpfs, which defeats the purpose
    # and won't work with reflink/hardlink.
    dpath = os.path.dirname(os.path.abspath(out))
    tmp_dir = os.path.join(dpath, "." + str(shortuuid.uuid()))
    try:
        with external_repo(cache_dir=tmp_dir, url=url, rev=rev) as repo:
            # Try any links possible to avoid data duplication.
            #
            # Not using symlink, because we need to remove cache after we are
            # done, and to make that work we would have to copy data over
            # anyway before removing the cache, so we might just copy it
            # right away.
            #
            # Also, we can't use theoretical "move" link type here, because
            # the same cache file might be used a few times in a directory.
            repo.cache.local.cache_types = ["reflink", "hardlink", "copy"]

            try:
                output = repo.find_out_by_relpath(path)
            except OutputNotFoundError:
                output = None

            if output and output.use_cache:
                _get_cached(repo, output, out)
            else:
                # Either an uncached out with absolute path or a user error
                if os.path.isabs(path):
                    raise FileNotFoundError

                fs_copy(os.path.join(repo.root_dir, path), out)

    except (OutputNotFoundError, FileNotFoundError):
        raise PathMissingError(path, url)
    except NotDvcRepoError:
        raise UrlNotDvcRepoError(url)
    finally:
        remove(tmp_dir)
Esempio n. 22
0
    def get_external(self, path, dest):
        """Convenience wrapper for fetch_external and checkout."""
        path_info = PathInfo(self.root_dir) / path
        with reraise(FileNotFoundError, PathMissingError(path, self.url)):
            metadata = self.repo_tree.metadata(path_info)

        self._check_repo(path_info, metadata.repo)
        if metadata.output_exists:
            repo = metadata.repo
            cache = repo.cache.local
            # fetch DVC and git files to tmpdir cache, then checkout
            save_info = self._fetch_to_cache(path_info, repo, None)
            cache.checkout(PathInfo(dest), save_info)
        else:
            # git-only folder, just copy files directly to dest
            tree = self._get_tree_for(metadata.repo)  # ignore subrepos
            tree.copytree(path_info, dest)
Esempio n. 23
0
    def pull_to(self, path, to_info):
        try:
            out = None
            with suppress(OutputNotFoundError):
                out = self.find_out_by_relpath(path)

            if out and out.use_cache:
                self._pull_cached(out, to_info)
                return

            # Git handled files can't have absolute path
            if os.path.isabs(path):
                raise FileNotFoundError

            fs_copy(os.path.join(self.root_dir, path), fspath(to_info))
        except FileNotFoundError:
            raise PathMissingError(path, self.url)
Esempio n. 24
0
File: ls.py Progetto: pbielak/dvc
def ls(
    url,
    path=None,
    rev=None,
    recursive=None,
    dvc_only=False,
):
    """Methods for getting files and outputs for the repo.

    Args:
        url (str): the repo url
        path (str, optional): relative path into the repo
        rev (str, optional): SHA commit, branch or tag name
        recursive (bool, optional): recursively walk the repo
        dvc_only (bool, optional): show only DVC-artifacts

    Returns:
        list of `entry`

    Notes:
        `entry` is a dictionary with structure
        {
            "path": str,
            "isout": bool,
            "isdir": bool,
            "isexec": bool,
        }
    """
    from . import Repo

    with Repo.open(url, rev, subrepos=True, uninitialized=True) as repo:
        path_info = PathInfo(repo.root_dir)
        if path:
            path_info /= path

        ret = _ls(repo.repo_tree, path_info, recursive, dvc_only)

        if path and not ret:
            raise PathMissingError(path, repo, dvc_only=dvc_only)

        ret_list = []
        for path, info in ret.items():
            info["path"] = path
            ret_list.append(info)
        ret_list.sort(key=lambda f: f["path"])
        return ret_list
Esempio n. 25
0
File: repo.py Progetto: ush98/dvc
    def get_used_objs(self,
                      **kwargs) -> Dict[Optional["ObjectDB"], Set["HashFile"]]:
        from dvc.config import NoRemoteError
        from dvc.exceptions import NoOutputOrStageError, PathMissingError
        from dvc.objects.db.git import GitObjectDB
        from dvc.objects.stage import stage

        local_odb = self.repo.odb.local
        locked = kwargs.pop("locked", True)
        with self._make_repo(locked=locked,
                             cache_dir=local_odb.cache_dir) as repo:
            used_objs = defaultdict(set)
            rev = repo.get_rev()
            if locked and self.def_repo.get(self.PARAM_REV_LOCK) is None:
                self.def_repo[self.PARAM_REV_LOCK] = rev

            path_info = PathInfo(repo.root_dir) / str(self.def_path)
            try:
                for odb, objs in repo.used_objs(
                    [os.fspath(path_info)],
                        force=True,
                        jobs=kwargs.get("jobs"),
                        recursive=True,
                ).items():
                    if odb is None:
                        odb = repo.cloud.get_remote().odb
                    self._check_circular_import(odb)
                    used_objs[odb].update(objs)
            except (NoRemoteError, NoOutputOrStageError):
                pass

            try:
                staged_obj = stage(
                    local_odb,
                    path_info,
                    repo.repo_fs,
                    local_odb.fs.PARAM_CHECKSUM,
                )
            except FileNotFoundError as exc:
                raise PathMissingError(self.def_path,
                                       self.def_repo[self.PARAM_URL]) from exc

            self._staged_objs[rev] = staged_obj
            git_odb = GitObjectDB(repo.repo_fs, repo.root_dir)
            used_objs[git_odb].add(staged_obj)
            return used_objs
Esempio n. 26
0
def external_repo(url, rev=None):
    path = _cached_clone(url, rev)
    try:
        repo = ExternalRepo(path, url)
    except NotDvcRepoError:
        repo = ExternalGitRepo(path, url)

    try:
        yield repo
    except NoRemoteError:
        raise NoRemoteInExternalRepoError(url)
    except OutputNotFoundError as exc:
        if exc.repo is repo:
            raise NoOutputInExternalRepoError(exc.output, repo.root_dir, url)
        raise
    except FileMissingError as exc:
        raise PathMissingError(exc.path, url)
    finally:
        repo.close()
Esempio n. 27
0
def _ls(
    repo: "Repo", path: str, recursive: bool = None, dvc_only: bool = False
):
    fs: "DvcFileSystem" = repo.dvcfs
    fs_path = fs.from_os_path(path)

    try:
        fs_path = fs.info(fs_path)["name"]
    except FileNotFoundError:
        raise PathMissingError(path, repo, dvc_only=dvc_only)

    infos = {}
    for root, dirs, files in fs.walk(
        fs_path, dvcfiles=True, dvc_only=dvc_only
    ):
        entries = chain(files, dirs) if not recursive else files

        for entry in entries:
            entry_fs_path = fs.path.join(root, entry)
            relparts = fs.path.relparts(entry_fs_path, fs_path)
            name = os.path.join(*relparts)
            infos[name] = fs.info(entry_fs_path)

        if not recursive:
            break

    if not infos and fs.isfile(fs_path):
        infos[os.path.basename(path)] = fs.info(fs_path)

    ret = {}
    for name, info in infos.items():
        dvc_info = info.get("dvc_info", {})
        if dvc_info.get("outs") or not dvc_only:
            ret[name] = {
                "isout": dvc_info.get("isout", False),
                "isdir": info["type"] == "directory",
                "isexec": info.get("isexec", False),
            }

    return ret
Esempio n. 28
0
def get_url(path, repo=None, rev=None, remote=None):
    """
    Returns the URL to the storage location of a data file or directory tracked
    in a DVC repo. For Git repos, HEAD is used unless a rev argument is
    supplied. The default remote is tried unless a remote argument is supplied.

    Raises OutputNotFoundError if the file is not tracked by DVC.

    NOTE: This function does not check for the actual existence of the file or
    directory in the remote storage.
    """
    with Repo.open(repo, rev=rev, subrepos=True, uninitialized=True) as _repo:
        fs_path = _repo.fs.path.join(_repo.root_dir, path)
        with reraise(FileNotFoundError, PathMissingError(path, repo)):
            info = _repo.repo_fs.info(fs_path)

        if not info["isdvc"]:
            raise OutputNotFoundError(path, repo)

        cloud = info["repo"].cloud
        md5 = info["repo"].dvcfs.info(fs_path)["md5"]
        return cloud.get_url_for(remote, checksum=md5)
Esempio n. 29
0
def get_url(path, repo=None, rev=None, remote=None):
    """
    Returns the URL to the storage location of a data file or directory tracked
    in a DVC repo. For Git repos, HEAD is used unless a rev argument is
    supplied. The default remote is tried unless a remote argument is supplied.

    Raises OutputNotFoundError if the file is not a dvc-tracked file.

    NOTE: This function does not check for the actual existence of the file or
    directory in the remote storage.
    """
    with _make_repo(repo, rev=rev) as _repo:
        path_info = PathInfo(_repo.root_dir) / path
        with reraise(FileNotFoundError, PathMissingError(path, repo)):
            metadata = _repo.repo_tree.metadata(path_info)

        if not metadata.is_dvc:
            raise OutputNotFoundError(path, repo)

        cloud = metadata.repo.cloud
        hash_info = _repo.repo_tree.get_hash(path_info)
        return cloud.get_url_for(remote, checksum=hash_info.value)
Esempio n. 30
0
def external_repo(url, rev=None, for_write=False):
    logger.debug("Creating external repo {}@{}", url, rev)
    path = _cached_clone(url, rev, for_write=for_write)
    try:
        repo = ExternalRepo(path, url)
    except NotDvcRepoError:
        repo = ExternalGitRepo(path, url)

    try:
        yield repo
    except NoRemoteError:
        raise NoRemoteInExternalRepoError(url)
    except OutputNotFoundError as exc:
        if exc.repo is repo:
            raise NoOutputInExternalRepoError(exc.output, repo.root_dir, url)
        raise
    except FileMissingError as exc:
        raise PathMissingError(exc.path, url)
    finally:
        repo.close()
        if for_write:
            _remove(path)