예제 #1
0
파일: diff.py 프로젝트: vishalbelsare/dvc
def diff(repo, *args, a_rev=None, b_rev=None, param_deps=False, **kwargs):
    from dvc.repo.experiments.show import _collect_experiment_commit
    from dvc.scm import resolve_rev

    if repo.scm.no_commits:
        return {}

    if a_rev:
        a_rev = fix_exp_head(repo.scm, a_rev)
        rev = resolve_rev(repo.scm, a_rev)
        old = _collect_experiment_commit(repo, rev, param_deps=param_deps)
    else:
        old = _collect_experiment_commit(
            repo, fix_exp_head(repo.scm, "HEAD"), param_deps=param_deps
        )

    if b_rev:
        b_rev = fix_exp_head(repo.scm, b_rev)
        rev = resolve_rev(repo.scm, b_rev)
        new = _collect_experiment_commit(repo, rev, param_deps=param_deps)
    else:
        new = _collect_experiment_commit(
            repo, "workspace", param_deps=param_deps
        )

    with_unchanged = kwargs.pop("all", False)

    return {
        key: _diff(
            format_dict(old.get("data", {}).get(key, {})),
            format_dict(new.get("data", {}).get(key, {})),
            with_unchanged=with_unchanged,
        )
        for key in ["metrics", "params"]
    }
예제 #2
0
def test_fix_exp_head(tmp_dir, scm, tail):
    from dvc.repo.experiments.base import EXEC_BASELINE
    from dvc.repo.experiments.utils import fix_exp_head

    head = "HEAD" + tail
    assert head == fix_exp_head(scm, head)

    scm.set_ref(EXEC_BASELINE, "refs/heads/master")
    assert EXEC_BASELINE + tail == fix_exp_head(scm, head)
    assert "foo" + tail == fix_exp_head(scm, "foo" + tail)
예제 #3
0
파일: diff.py 프로젝트: vijay-pinjala/dvc
def diff(repo, *args, a_rev=None, b_rev=None, **kwargs):
    if repo.scm.no_commits:
        return {}

    with_unchanged = kwargs.pop("all", False)

    a_rev = a_rev or "HEAD"
    a_rev = fix_exp_head(repo.scm, a_rev)
    b_rev = fix_exp_head(repo.scm, b_rev) or "workspace"

    metrics = _get_metrics(repo, *args, **kwargs, revs=[a_rev, b_rev])
    old = metrics.get(a_rev, {})
    new = metrics.get(b_rev, {})

    return _diff(format_dict(old),
                 format_dict(new),
                 with_unchanged=with_unchanged)
예제 #4
0
파일: diff.py 프로젝트: vishalbelsare/dvc
def _revisions(repo, revs, experiment):
    revisions = revs or []
    if experiment and len(revisions) == 1:
        baseline = repo.experiments.get_baseline(revisions[0])
        if baseline:
            revisions.append(baseline[:7])
    if len(revisions) <= 1:
        if len(revisions) == 0 and repo.scm.is_dirty():
            revisions.append(fix_exp_head(repo.scm, "HEAD"))
        revisions.append("workspace")
    return revisions
예제 #5
0
파일: scm.py 프로젝트: nik123/dvc
def iter_revs(
    scm: "Git",
    head_revs: Optional[List[str]] = None,
    num: int = 1,
    all_branches: bool = False,
    all_tags: bool = False,
    all_commits: bool = False,
    all_experiments: bool = False,
) -> Mapping[str, List[str]]:
    from dvc.repo.experiments.utils import fix_exp_head

    if num < 1 and num != -1:
        raise InvalidArgumentError(f"Invalid number of commits '{num}'")

    if not any(
        [head_revs, all_branches, all_tags, all_commits, all_experiments]
    ):
        return {}

    head_revs = head_revs or []
    revs = []
    for rev in head_revs:
        revs.append(rev)
        n = 1
        while True:
            if num == n:
                break
            try:
                head = fix_exp_head(scm, f"{rev}~{n}")
                assert head
                revs.append(resolve_rev(scm, head))
            except RevError:
                break
            n += 1

    if all_commits:
        revs.extend(scm.list_all_commits())
    else:
        if all_branches:
            revs.extend(scm.list_branches())

        if all_tags:
            revs.extend(scm.list_tags())

    if all_experiments:
        from dvc.repo.experiments.utils import exp_commits

        revs.extend(exp_commits(scm))

    rev_resolver = partial(resolve_rev, scm)
    return group_by(rev_resolver, revs)
예제 #6
0
파일: scm.py 프로젝트: jhhuh/dvc
def resolve_rev(scm: "Git", rev: str) -> str:
    from scmrepo.exceptions import RevError as InternalRevError

    from dvc.repo.experiments.utils import fix_exp_head

    try:
        return scm.resolve_rev(fix_exp_head(scm, rev))
    except InternalRevError as exc:
        # `scm` will only resolve git branch and tag names,
        # if rev is not a sha it may be an abbreviated experiment name
        if not rev.startswith("refs/"):
            from dvc.repo.experiments.utils import (
                AmbiguousExpRefInfo,
                resolve_name,
            )

            try:
                ref_infos = resolve_name(scm, rev).get(rev)
            except AmbiguousExpRefInfo:
                raise RevError(f"ambiguous Git revision '{rev}'")
            if ref_infos:
                return scm.get_ref(str(ref_infos))

        raise RevError(str(exc))
예제 #7
0
def diff(self, a_rev="HEAD", b_rev=None, targets=None):
    """
    By default, it compares the workspace with the last commit's fs.

    This implementation differs from `git diff` since DVC doesn't have
    the concept of `index`, but it keeps the same interface, thus,
    `dvc diff` would be the same as `dvc diff HEAD`.
    """

    if self.scm.no_commits:
        return {}

    from dvc.fs.repo import RepoFileSystem

    repo_fs = RepoFileSystem(self)

    a_rev = fix_exp_head(self.scm, a_rev)
    b_rev = fix_exp_head(self.scm, b_rev) if b_rev else "workspace"
    results = {}
    missing_targets = {}
    for rev in self.brancher(revs=[a_rev, b_rev]):
        if rev == "workspace" and rev != b_rev:
            # brancher always returns workspace, but we only need to compute
            # workspace paths/checksums if b_rev was None
            continue

        targets_path_infos = None
        if targets is not None:
            # convert targets to path_infos, and capture any missing targets
            targets_path_infos, missing_targets[rev] = _targets_to_path_infos(
                repo_fs, targets
            )

        results[rev] = _paths_checksums(self, targets_path_infos)

    if targets is not None:
        # check for overlapping missing targets between a_rev and b_rev
        for target in set(missing_targets[a_rev]) & set(
            missing_targets[b_rev]
        ):
            raise PathMissingError(target, self)

    old = results[a_rev]
    new = results[b_rev]

    # Compare paths between the old and new fs.
    # set() efficiently converts dict keys to a set
    added = sorted(set(new) - set(old))
    deleted_or_missing = set(old) - set(new)
    if b_rev == "workspace":
        # missing status is only applicable when diffing local workspace
        # against a commit
        missing = sorted(_filter_missing(repo_fs, deleted_or_missing))
    else:
        missing = []
    deleted = sorted(deleted_or_missing - set(missing))
    modified = sorted(set(old) & set(new))

    # Cases when file was changed and renamed are resulted
    # in having deleted and added record
    # To cover such cases we need to change hashing function
    # to produce rolling/chunking hash

    renamed = _calculate_renamed(new, old, added, deleted)

    for renamed_item in renamed:
        added.remove(renamed_item["path"]["new"])
        deleted.remove(renamed_item["path"]["old"])

    ret = {
        "added": [{"path": path, "hash": new[path]} for path in added],
        "deleted": [{"path": path, "hash": old[path]} for path in deleted],
        "modified": [
            {"path": path, "hash": {"old": old[path], "new": new[path]}}
            for path in modified
            if old[path] != new[path]
        ],
        "renamed": renamed,
        "not in cache": [
            {"path": path, "hash": old[path]} for path in missing
        ],
    }

    return ret if any(ret.values()) else {}
예제 #8
0
def show(
    repo,
    all_branches=False,
    all_tags=False,
    revs=None,
    all_commits=False,
    sha_only=False,
    num=1,
    param_deps=False,
    onerror: Optional[Callable] = None,
):
    if onerror is None:
        onerror = onerror_collect

    res: Dict[str, Dict] = defaultdict(OrderedDict)

    if num < 1:
        raise InvalidArgumentError(f"Invalid number of commits '{num}'")

    if revs is None:
        from dvc.scm import RevError, resolve_rev

        revs = []
        for n in range(num):
            try:
                head = fix_exp_head(repo.scm, f"HEAD~{n}")
                assert head
                revs.append(resolve_rev(repo.scm, head))
            except RevError:
                break

    revs = OrderedDict((rev, None) for rev in repo.brancher(
        revs=revs,
        all_branches=all_branches,
        all_tags=all_tags,
        all_commits=all_commits,
        sha_only=True,
    ))

    running = repo.experiments.get_running_exps()

    for rev in revs:
        res[rev]["baseline"] = _collect_experiment_commit(
            repo,
            rev,
            sha_only=sha_only,
            param_deps=param_deps,
            running=running,
            onerror=onerror,
        )

        if rev == "workspace":
            continue

        ref_info = ExpRefInfo(baseline_sha=rev)
        commits = [(ref, repo.scm.resolve_commit(ref))
                   for ref in repo.scm.iter_refs(base=str(ref_info))]
        for exp_ref, _ in sorted(commits,
                                 key=lambda x: x[1].commit_time,
                                 reverse=True):
            ref_info = ExpRefInfo.from_ref(exp_ref)
            assert ref_info.baseline_sha == rev
            _collect_experiment_branch(
                res[rev],
                repo,
                exp_ref,
                rev,
                sha_only=sha_only,
                param_deps=param_deps,
                running=running,
                onerror=onerror,
            )
        # collect queued (not yet reproduced) experiments
        for stash_rev, entry in repo.experiments.stash_revs.items():
            if entry.baseline_rev in revs:
                if stash_rev not in running or not running[stash_rev].get(
                        "last"):
                    experiment = _collect_experiment_commit(
                        repo,
                        stash_rev,
                        sha_only=sha_only,
                        stash=stash_rev not in running,
                        param_deps=param_deps,
                        running=running,
                        onerror=onerror,
                    )
                    res[entry.baseline_rev][stash_rev] = experiment
    return res
예제 #9
0
def show(
    repo,
    all_branches=False,
    all_tags=False,
    revs=None,
    all_commits=False,
    sha_only=False,
    num=1,
    param_deps=False,
):
    res = defaultdict(OrderedDict)

    if num < 1:
        raise InvalidArgumentError(f"Invalid number of commits '{num}'")

    if revs is None:
        revs = []
        for n in range(num):
            try:
                head = fix_exp_head(repo.scm, f"HEAD~{n}")
                revs.append(repo.scm.resolve_rev(head))
            except SCMError:
                break

    revs = OrderedDict((rev, None) for rev in repo.brancher(
        revs=revs,
        all_branches=all_branches,
        all_tags=all_tags,
        all_commits=all_commits,
        sha_only=True,
    ))

    for rev in revs:
        res[rev]["baseline"] = _collect_experiment_commit(
            repo, rev, sha_only=sha_only, param_deps=param_deps)

        if rev == "workspace":
            continue

        ref_info = ExpRefInfo(baseline_sha=rev)
        commits = [(ref, repo.scm.resolve_commit(ref))
                   for ref in repo.scm.iter_refs(base=str(ref_info))]
        for exp_ref, _ in sorted(
                commits,
                key=lambda x: x[1].commit_time,
                reverse=True,
        ):
            ref_info = ExpRefInfo.from_ref(exp_ref)
            assert ref_info.baseline_sha == rev
            _collect_experiment_branch(
                res[rev],
                repo,
                exp_ref,
                rev,
                sha_only=sha_only,
                param_deps=param_deps,
            )

    # collect queued (not yet reproduced) experiments
    for stash_rev, entry in repo.experiments.stash_revs.items():
        if entry.baseline_rev in revs:
            experiment = _collect_experiment_commit(repo,
                                                    stash_rev,
                                                    stash=True,
                                                    param_deps=param_deps)
            res[entry.baseline_rev][stash_rev] = experiment

    return res