Beispiel #1
0
def test_push_wildcard_from_bare_git_repo(tmp_dir, make_tmp_dir, erepo_dir,
                                          local_cloud):
    Git.init(tmp_dir.fs_path, bare=True).close()

    erepo_dir.add_remote(config=local_cloud.config)
    with erepo_dir.chdir():
        erepo_dir.dvc_gen(
            {
                "dir123": {
                    "foo": "foo content"
                },
                "dirextra": {
                    "extrafoo": "extra foo content"
                },
            },
            commit="initial",
        )
    erepo_dir.dvc.push([os.path.join(os.fspath(erepo_dir), "dire*")],
                       glob=True)

    erepo_dir.scm.gitpython.repo.create_remote("origin", os.fspath(tmp_dir))
    erepo_dir.scm.gitpython.repo.remote("origin").push("master")

    dvc_repo = make_tmp_dir("dvc-repo", scm=True, dvc=True)
    with dvc_repo.chdir():
        dvc_repo.dvc.imp(os.fspath(tmp_dir), "dirextra")

        with pytest.raises(PathMissingError):
            dvc_repo.dvc.imp(os.fspath(tmp_dir), "dir123")
Beispiel #2
0
def git(tmp_dir, scm, request):
    from scmrepo.git import Git

    git_ = Git(os.fspath(tmp_dir), backends=[request.param])
    git_.test_backend = request.param
    yield git_
    git_.close()
Beispiel #3
0
def test_no_commits(tmp_dir):
    from scmrepo.git import Git

    from tests.dir_helpers import git_init

    git_init(".")
    assert Git().no_commits

    tmp_dir.gen("foo", "foo")
    Git().add(["foo"])
    Git().commit("foo")

    assert not Git().no_commits
Beispiel #4
0
def test_import_from_bare_git_repo(tmp_dir, make_tmp_dir, erepo_dir,
                                   local_cloud):
    Git.init(tmp_dir.fs_path, bare=True).close()

    erepo_dir.add_remote(config=local_cloud.config)
    with erepo_dir.chdir():
        erepo_dir.dvc_gen({"foo": "foo"}, commit="initial")
    erepo_dir.dvc.push()

    erepo_dir.scm.gitpython.repo.create_remote("origin", os.fspath(tmp_dir))
    erepo_dir.scm.gitpython.repo.remote("origin").push("master")

    dvc_repo = make_tmp_dir("dvc-repo", scm=True, dvc=True)
    with dvc_repo.chdir():
        dvc_repo.dvc.imp(os.fspath(tmp_dir), "foo")
Beispiel #5
0
def _clone_default_branch(url, rev, for_write=False):
    """Get or create a clean clone of the url.

    The cloned is reactualized with git pull unless rev is a known sha.
    """
    from scmrepo.git import Git

    clone_path, shallow = CLONES.get(url, (None, False))

    git = None
    try:
        if clone_path:
            git = Git(clone_path)
            # Do not pull for known shas, branches and tags might move
            if not Git.is_sha(rev) or not git.has_rev(rev):
                if shallow:
                    # If we are missing a rev in a shallow clone, fallback to
                    # a full (unshallowed) clone. Since fetching specific rev
                    # SHAs is only available in certain git versions, if we
                    # have need to reference multiple specific revs for a
                    # given repo URL it is easier/safer for us to work with
                    # full clones in this case.
                    logger.debug("erepo: unshallowing clone for '%s'", url)
                    _unshallow(git)
                    shallow = False
                    CLONES[url] = (clone_path, shallow)
                else:
                    logger.debug("erepo: git pull '%s'", url)
                    git.pull()
        else:
            from dvc.scm import clone

            logger.debug("erepo: git clone '%s' to a temporary dir", url)
            clone_path = tempfile.mkdtemp("dvc-clone")
            if not for_write and rev and not Git.is_sha(rev):
                # If rev is a tag or branch name try shallow clone first

                try:
                    git = clone(url, clone_path, shallow_branch=rev)
                    shallow = True
                    logger.debug("erepo: using shallow clone for branch '%s'",
                                 rev)
                except CloneError:
                    pass
            if not git:
                git = clone(url, clone_path)
                shallow = False
            CLONES[url] = (clone_path, shallow)
    finally:
        if git:
            git.close()

    return clone_path, shallow
Beispiel #6
0
def ls(repo, *args, rev=None, git_remote=None, all_=False, **kwargs):
    from scmrepo.git import Git

    from dvc.scm import RevError, resolve_rev

    if rev:
        try:
            rev = resolve_rev(repo.scm, rev)
        except RevError:
            if not (git_remote and Git.is_sha(rev)):
                # This could be a remote rev that has not been fetched yet
                raise
    elif not all_:
        rev = repo.scm.get_rev()

    results = defaultdict(list)

    if rev:
        if git_remote:
            gen = remote_exp_refs_by_baseline(repo.scm, git_remote, rev)
        else:
            gen = exp_refs_by_baseline(repo.scm, rev)
        for info in gen:
            results[rev].append(info.name)
    elif all_:
        if git_remote:
            gen = remote_exp_refs(repo.scm, git_remote)
        else:
            gen = exp_refs(repo.scm)
        for info in gen:
            results[info.baseline_sha].append(info.name)

    return results
Beispiel #7
0
    def make(name, *, scm=False, dvc=False, subdir=False):  # pylint: disable=W0621
        from shutil import ignore_patterns

        from scmrepo.git import Git

        from dvc.repo import Repo

        from .tmp_dir import TmpDir

        cache = CACHE.get((scm, dvc, subdir))
        if not cache:
            cache = tmp_path_factory.mktemp("dvc-test-cache" + worker_id)
            TmpDir(cache).init(scm=scm, dvc=dvc, subdir=subdir)
            CACHE[(scm, dvc, subdir)] = os.fspath(cache)
        path = tmp_path_factory.mktemp(name) if isinstance(name, str) else name

        # ignore sqlite files from .dvc/tmp. We might not be closing the cache
        # connection resulting in PermissionErrors in Windows.
        ignore = ignore_patterns("cache.db*")
        for entry in os.listdir(cache):
            # shutil.copytree's dirs_exist_ok is only available in >=3.8
            _fs_copy(
                os.path.join(cache, entry),
                os.path.join(path, entry),
                ignore=ignore,
            )
        new_dir = TmpDir(path)
        str_path = os.fspath(new_dir)
        if dvc:
            new_dir.dvc = Repo(str_path)
        if scm:
            new_dir.scm = (new_dir.dvc.scm
                           if hasattr(new_dir, "dvc") else Git(str_path))
        request.addfinalizer(new_dir.close)
        return new_dir
Beispiel #8
0
def clone(url: str, to_path: str, **kwargs):
    from scmrepo.exceptions import CloneError as InternalCloneError

    with TqdmGit(desc="Cloning") as pbar:
        try:
            return Git.clone(url, to_path, progress=pbar.update_git, **kwargs)
        except InternalCloneError as exc:
            raise CloneError(str(exc))
Beispiel #9
0
def test_no_commits(tmp_dir):
    from scmrepo.git import Git

    from dvc.repo import Repo

    git = Git.init(tmp_dir.fs_path)
    assert git.no_commits

    assert Repo.init().metrics.diff() == {}
Beispiel #10
0
def test_no_commits(tmp_dir):
    from scmrepo.git import Git

    from dvc.repo import Repo
    from tests.dir_helpers import git_init

    git_init(".")
    assert Git().no_commits

    assert Repo.init().metrics.diff() == {}
Beispiel #11
0
    def init(self, *, scm=False, dvc=False, subdir=False):
        from scmrepo.git import Git

        from dvc.repo import Repo

        assert not scm or not hasattr(self, "scm")
        assert not dvc or not hasattr(self, "dvc")

        if scm:
            Git.init(self.fs_path).close()
        if dvc:
            self.dvc = Repo.init(
                self.fs_path,
                no_scm=not scm and not hasattr(self, "scm"),
                subdir=subdir,
            )
        if scm:
            self.scm = (self.dvc.scm
                        if hasattr(self, "dvc") else Git(self.fs_path))
        if dvc and hasattr(self, "scm"):
            self.scm.commit("init dvc")
Beispiel #12
0
Datei: scm.py Projekt: jhhuh/dvc
def clone(url: str, to_path: str, **kwargs):
    from scmrepo.exceptions import CloneError as InternalCloneError

    from dvc.repo.experiments.utils import fetch_all_exps

    with TqdmGit(desc="Cloning") as pbar:
        try:
            git = Git.clone(url, to_path, progress=pbar.update_git, **kwargs)
            if "shallow_branch" not in kwargs:
                fetch_all_exps(git, "origin", progress=pbar.update_git)
            return git
        except InternalCloneError as exc:
            raise CloneError(str(exc))
Beispiel #13
0
def _git_checkout(repo_path, rev):
    from scmrepo.git import Git

    logger.debug("erepo: git checkout %s@%s", repo_path, rev)
    git = Git(repo_path)
    try:
        git.checkout(rev)
    finally:
        git.close()
Beispiel #14
0
def SCM(root_dir, search_parent_directories=True, no_scm=False):  # pylint: disable=invalid-name
    """Returns SCM instance that corresponds to a repo at the specified
    path.

    Args:
        root_dir (str): path to a root directory of the repo.
        search_parent_directories (bool): whether to look for repo root in
        parent directories.
        no_scm (bool): return NoSCM if True.

    Returns:
        dvc.scm.base.Base: SCM instance.
    """
    with map_scm_exception():
        if no_scm:
            return NoSCM(root_dir, _raise_not_implemented_as=NoSCMError)
        return Git(root_dir,
                   search_parent_directories=search_parent_directories)
Beispiel #15
0
    def init(self, *, scm=False, dvc=False, subdir=False):
        from scmrepo.git import Git

        from dvc.repo import Repo

        assert not scm or not hasattr(self, "scm")
        assert not dvc or not hasattr(self, "dvc")

        str_path = os.fspath(self)

        if scm:
            git_init(str_path)
        if dvc:
            self.dvc = Repo.init(
                str_path,
                no_scm=not scm and not hasattr(self, "scm"),
                subdir=subdir,
            )
        if scm:
            self.scm = self.dvc.scm if hasattr(self, "dvc") else Git(str_path)
        if dvc and hasattr(self, "scm"):
            self.scm.commit("init dvc")
Beispiel #16
0
def _collect_rows(
    base_rev,
    experiments,
    metric_names,
    param_names,
    precision=DEFAULT_PRECISION,
    sort_by=None,
    sort_order=None,
    fill_value=FILL_VALUE,
    iso=False,
):
    from scmrepo.git import Git

    if sort_by:
        sort_path, sort_name, sort_type = _sort_column(
            sort_by, metric_names, param_names
        )
        reverse = sort_order == "desc"
        experiments = _sort_exp(
            experiments, sort_path, sort_name, sort_type, reverse
        )

    new_checkpoint = True
    for i, (rev, results) in enumerate(experiments.items()):
        exp = results.get("data", {})
        if exp.get("running"):
            state = "Running"
        elif exp.get("queued"):
            state = "Queued"
        else:
            state = fill_value
        executor = exp.get("executor", fill_value)
        is_baseline = rev == "baseline"

        if is_baseline:
            name_rev = base_rev[:7] if Git.is_sha(base_rev) else base_rev
        else:
            name_rev = rev[:7]

        exp_name = exp.get("name", "")
        tip = exp.get("checkpoint_tip")

        parent_rev = exp.get("checkpoint_parent", "")
        parent_exp = experiments.get(parent_rev, {}).get("data", {})
        parent_tip = parent_exp.get("checkpoint_tip")

        parent = ""
        if is_baseline:
            typ = "baseline"
        elif tip:
            if tip == parent_tip:
                typ = (
                    "checkpoint_tip" if new_checkpoint else "checkpoint_commit"
                )
            elif parent_rev == base_rev:
                typ = "checkpoint_base"
            else:
                typ = "checkpoint_commit"
                parent = parent_rev[:7]
        elif i < len(experiments) - 1:
            typ = "branch_commit"
        else:
            typ = "branch_base"

        if not is_baseline:
            new_checkpoint = not (tip and tip == parent_tip)

        row = [
            exp_name,
            name_rev,
            typ,
            _format_time(exp.get("timestamp"), fill_value, iso),
            parent,
            state,
            executor,
        ]
        fill_value = FILL_VALUE_ERRORED if results.get("error") else fill_value
        _extend_row(
            row,
            metric_names,
            exp.get("metrics", {}).items(),
            precision,
            fill_value=fill_value,
        )
        _extend_row(
            row,
            param_names,
            exp.get("params", {}).items(),
            precision,
            fill_value=fill_value,
        )

        yield row
Beispiel #17
0
def external_repo(url,
                  rev=None,
                  for_write=False,
                  cache_dir=None,
                  cache_types=None,
                  **kwargs):
    from scmrepo.git import Git

    from dvc.config import NoRemoteError

    logger.debug("Creating external repo %s@%s", url, rev)
    path = _cached_clone(url, rev, for_write=for_write)
    # Local HEAD points to the tip of whatever branch we first cloned from
    # (which may not be the default branch), use origin/HEAD here to get
    # the tip of the default branch
    rev = rev or "refs/remotes/origin/HEAD"

    cache_config = {
        "cache": {
            "dir": cache_dir or _get_cache_dir(url),
            "type": cache_types
        }
    }

    config = _get_remote_config(url) if os.path.isdir(url) else {}
    config.update(cache_config)

    root_dir = path if for_write else os.path.realpath(path)
    repo_kwargs = dict(
        root_dir=root_dir,
        url=url,
        scm=None if for_write else Git(root_dir),
        rev=None if for_write else rev,
        config=config,
        repo_factory=erepo_factory(url, cache_config),
        **kwargs,
    )

    if "subrepos" not in repo_kwargs:
        repo_kwargs["subrepos"] = True

    if "uninitialized" not in repo_kwargs:
        repo_kwargs["uninitialized"] = True

    repo = Repo(**repo_kwargs)

    try:
        yield repo
    except NoRemoteError as exc:
        raise NoRemoteInExternalRepoError(url) from exc
    except OutputNotFoundError as exc:
        if exc.repo is repo:
            raise NoOutputInExternalRepoError(exc.output, repo.root_dir,
                                              url) from exc
        raise
    except FileMissingError as exc:
        raise PathMissingError(exc.path, url) from exc
    finally:
        repo.close()
        if for_write:
            _remove(path)
Beispiel #18
0
def fix_exp_head(scm: "Git", ref: Optional[str]) -> Optional[str]:
    if ref:
        name, tail = Git.split_ref_pattern(ref)
        if name == "HEAD" and scm.get_ref(EXEC_BASELINE):
            return "".join((EXEC_BASELINE, tail))
    return ref
Beispiel #19
0
def _collect_rows(
    base_rev,
    experiments,
    all_headers,
    metric_headers,
    param_headers,
    metric_names,
    param_names,
    deps_names,
    precision=DEFAULT_PRECISION,
    sort_by=None,
    sort_order=None,
    fill_value=FILL_VALUE,
    iso=False,
):
    from scmrepo.git import Git

    if sort_by:
        sort_path, sort_name, sort_type = _sort_column(
            sort_by, metric_names, param_names
        )
        reverse = sort_order == "desc"
        experiments = _sort_exp(
            experiments, sort_path, sort_name, sort_type, reverse
        )

    new_checkpoint = True
    for i, (rev, results) in enumerate(experiments.items()):
        fill_value = FILL_VALUE_ERRORED if results.get("error") else fill_value
        row_dict = {k: fill_value for k in all_headers}

        exp = results.get("data", {})

        if exp.get("running"):
            state = "Running"
        elif exp.get("queued"):
            state = "Queued"
        else:
            state = fill_value

        is_baseline = rev == "baseline"

        if is_baseline:
            name_rev = base_rev[:7] if Git.is_sha(base_rev) else base_rev
        else:
            name_rev = rev[:7]

        tip = exp.get("checkpoint_tip")
        parent_rev = exp.get("checkpoint_parent", "")
        parent_exp = experiments.get(parent_rev, {}).get("data", {})
        parent_tip = parent_exp.get("checkpoint_tip")

        parent = ""
        if is_baseline:
            typ = "baseline"
        elif tip:
            if tip == parent_tip:
                typ = (
                    "checkpoint_tip" if new_checkpoint else "checkpoint_commit"
                )
            elif parent_rev == base_rev:
                typ = "checkpoint_base"
            else:
                typ = "checkpoint_commit"
                parent = parent_rev[:7]
        elif i < len(experiments) - 1:
            typ = "branch_commit"
        else:
            typ = "branch_base"

        if not is_baseline:
            new_checkpoint = not (tip and tip == parent_tip)

        row_dict["Experiment"] = exp.get("name", "")
        row_dict["rev"] = name_rev
        row_dict["typ"] = typ
        row_dict["Created"] = _format_time(
            exp.get("timestamp"), fill_value, iso
        )
        row_dict["parent"] = parent
        row_dict["State"] = state
        row_dict["Executor"] = exp.get("executor", fill_value)

        _extend_row(
            row_dict,
            metric_names,
            metric_headers,
            exp.get("metrics", {}).items(),
            precision,
            fill_value=fill_value,
        )
        _extend_row(
            row_dict,
            param_names,
            param_headers,
            exp.get("params", {}).items(),
            precision,
            fill_value=fill_value,
        )
        for dep in deps_names:
            hash_info = exp.get("deps", {}).get(dep, {}).get("hash")
            if hash_info is not None:
                hash_info = hash_info[:7]
            row_dict[dep] = hash_info
        yield list(row_dict.values())
Beispiel #20
0
    def setUp(self):
        from scmrepo.git import Git

        super().setUp()
        self.git = Git.init(".")
        self.git.add_commit(self.CODE, message="add code")