Exemple #1
0
def _clone_default_branch(url, rev, for_write=False):
    """Get or create a clean clone of the url.

    The cloned is reactualized with git pull unless rev is a known sha.
    """
    from dvc.scm.git import Git

    clone_path, shallow = CLONES.get(url, (None, False))

    git = None
    try:
        if clone_path:
            git = Git(clone_path)
            # Do not pull for known shas, branches and tags might move
            if not Git.is_sha(rev) or not git.has_rev(rev):
                if shallow:
                    # If we are missing a rev in a shallow clone, fallback to
                    # a full (unshallowed) clone. Since fetching specific rev
                    # SHAs is only available in certain git versions, if we
                    # have need to reference multiple specific revs for a
                    # given repo URL it is easier/safer for us to work with
                    # full clones in this case.
                    logger.debug("erepo: unshallowing clone for '%s'", url)
                    _unshallow(git)
                    shallow = False
                    CLONES[url] = (clone_path, shallow)
                else:
                    logger.debug("erepo: git pull '%s'", url)
                    git.pull()
        else:
            logger.debug("erepo: git clone '%s' to a temporary dir", url)
            clone_path = tempfile.mkdtemp("dvc-clone")
            if not for_write and rev and not Git.is_sha(rev):
                # If rev is a tag or branch name try shallow clone first
                from dvc.scm.base import CloneError

                try:
                    git = Git.clone(url, clone_path, shallow_branch=rev)
                    shallow = True
                    logger.debug(
                        "erepo: using shallow clone for branch '%s'", rev
                    )
                except CloneError:
                    pass
            if not git:
                git = Git.clone(url, clone_path)
                shallow = False
            CLONES[url] = (clone_path, shallow)
    finally:
        if git:
            git.close()

    return clone_path, shallow
Exemple #2
0
def _clone_default_branch(url, rev):
    """Get or create a clean clone of the url.

    The cloned is reactualized with git pull unless rev is a known sha.
    """
    clone_path = CLONES.get(url)

    git = None
    try:
        if clone_path:
            git = Git(clone_path)
            # Do not pull for known shas, branches and tags might move
            if not Git.is_sha(rev) or not git.has_rev(rev):
                logger.debug("erepo: git pull %s", url)
                git.pull()
        else:
            logger.debug("erepo: git clone %s to a temporary dir", url)
            clone_path = tempfile.mkdtemp("dvc-clone")
            git = Git.clone(url, clone_path)
            CLONES[url] = clone_path
    finally:
        if git:
            git.close()

    return clone_path
Exemple #3
0
def _cached_clone(url, rev, for_write=False):
    """Clone an external git repo to a temporary directory.

    Returns the path to a local temporary directory with the specified
    revision checked out. If for_write is set prevents reusing this dir via
    cache.
    """
    if not for_write and Git.is_sha(rev) and (url, rev) in CLONES:
        return CLONES[url, rev]

    clone_path = _clone_default_branch(url, rev)
    rev_sha = Git(clone_path).resolve_rev(rev or "HEAD")

    if not for_write and (url, rev_sha) in CLONES:
        return CLONES[url, rev_sha]

    # Copy to a new dir to keep the clone clean
    repo_path = tempfile.mkdtemp("dvc-erepo")
    logger.debug("erepo: making a copy of {} clone", url)
    copy_tree(clone_path, repo_path)

    # Check out the specified revision
    if rev is not None:
        _git_checkout(repo_path, rev)

    if not for_write:
        CLONES[url, rev_sha] = repo_path
    return repo_path
Exemple #4
0
def _show_experiments(all_experiments, console, precision=None):
    from rich.table import Table
    from dvc.scm.git import Git

    metric_names, param_names = _collect_names(all_experiments)

    table = Table()
    table.add_column("Experiment")
    for name in metric_names:
        table.add_column(name, justify="right")
    for name in param_names:
        table.add_column(name, justify="left")

    for base_rev, experiments in all_experiments.items():
        if Git.is_sha(base_rev):
            base_rev = base_rev[:7]

        for row, _, in _collect_rows(
                base_rev,
                experiments,
                metric_names,
                param_names,
                precision=precision,
        ):
            table.add_row(*row)

    console.print(table)
Exemple #5
0
def _show_experiments(all_experiments, console, precision=None):
    from rich.table import Table
    from dvc.scm.git import Git

    metric_names, param_names = _collect_names(all_experiments)

    table = Table(row_styles=["white", "bright_white"])
    table.add_column("Experiment", header_style="black on grey93")
    for name in metric_names:
        table.add_column(name,
                         justify="right",
                         header_style="black on cornsilk1")
    for name in param_names:
        table.add_column(name,
                         justify="left",
                         header_style="black on light_cyan1")

    for base_rev, experiments in all_experiments.items():
        if Git.is_sha(base_rev):
            base_rev = base_rev[:7]

        for row, style, in _collect_rows(
                base_rev,
                experiments,
                metric_names,
                param_names,
                precision=precision,
        ):
            table.add_row(*row, style=style)

    console.print(table)
Exemple #6
0
def ls(repo, *args, rev=None, git_remote=None, all_=False, **kwargs):
    from dvc.scm import RevError, resolve_rev
    from dvc.scm.git import Git

    if rev:
        try:
            rev = resolve_rev(repo.scm, rev)
        except RevError:
            if not (git_remote and Git.is_sha(rev)):
                # This could be a remote rev that has not been fetched yet
                raise
    elif not all_:
        rev = repo.scm.get_rev()

    results = defaultdict(list)

    if rev:
        if git_remote:
            gen = remote_exp_refs_by_baseline(repo.scm, git_remote, rev)
        else:
            gen = exp_refs_by_baseline(repo.scm, rev)
        for info in gen:
            results[rev].append(info.name)
    elif all_:
        if git_remote:
            gen = remote_exp_refs(repo.scm, git_remote)
        else:
            gen = exp_refs(repo.scm)
        for info in gen:
            results[info.baseline_sha].append(info.name)

    return results
Exemple #7
0
 def _scm_checkout(self, rev):
     self.scm.repo.git.reset(hard=True)
     if self.scm.repo.head.is_detached:
         self._checkout_default_branch()
     if not Git.is_sha(rev) or not self.scm.has_rev(rev):
         self.scm.pull()
     logger.debug("Checking out experiment commit '%s'", rev)
     self.scm.checkout(rev)
Exemple #8
0
 def _scm_checkout(self, rev):
     self.scm.repo.git.reset(hard=True)
     if self.scm.repo.head.is_detached:
         # switch back to default branch
         self.scm.repo.heads[0].checkout()
     if not Git.is_sha(rev) or not self.scm.has_rev(rev):
         self.scm.pull()
     logger.debug("Checking out base experiment commit '%s'", rev)
     self.scm.checkout(rev)
Exemple #9
0
def _show_experiments(all_experiments, console, **kwargs):
    from rich.table import Table

    from dvc.scm.git import Git

    include_metrics = _parse_list(kwargs.pop("include_metrics", []))
    exclude_metrics = _parse_list(kwargs.pop("exclude_metrics", []))
    include_params = _parse_list(kwargs.pop("include_params", []))
    exclude_params = _parse_list(kwargs.pop("exclude_params", []))

    metric_names, param_names = _collect_names(
        all_experiments,
        include_metrics=include_metrics,
        exclude_metrics=exclude_metrics,
        include_params=include_params,
        exclude_params=exclude_params,
    )

    table = Table()
    table.add_column("Experiment", no_wrap=True)
    if not kwargs.get("no_timestamp", False):
        table.add_column("Created")
    for name in metric_names:
        table.add_column(name, justify="right", no_wrap=True)
    for name in param_names:
        table.add_column(name, justify="left")

    for base_rev, experiments in all_experiments.items():
        if Git.is_sha(base_rev):
            base_rev = base_rev[:7]

        for row, _, in _collect_rows(
                base_rev,
                experiments,
                metric_names,
                param_names,
                **kwargs,
        ):
            table.add_row(*row)

    console.print(table)
Exemple #10
0
def _collect_rows(
    base_rev,
    experiments,
    metric_names,
    param_names,
    precision=DEFAULT_PRECISION,
    sort_by=None,
    sort_order=None,
):
    from dvc.scm.git import Git

    if sort_by:
        sort_path, sort_name, sort_type = _sort_column(sort_by, metric_names,
                                                       param_names)
        reverse = sort_order == "desc"
        experiments = _sort_exp(experiments, sort_path, sort_name, sort_type,
                                reverse)

    new_checkpoint = True
    for i, (rev, exp) in enumerate(experiments.items()):
        queued = str(exp.get("queued") or "")
        is_baseline = rev == "baseline"

        if is_baseline:
            name_rev = base_rev[:7] if Git.is_sha(base_rev) else base_rev
        else:
            name_rev = rev[:7]

        exp_name = exp.get("name", "")
        tip = exp.get("checkpoint_tip")

        parent_rev = exp.get("checkpoint_parent", "")
        parent_exp = experiments.get(parent_rev, {})
        parent_tip = parent_exp.get("checkpoint_tip")

        parent = ""
        if is_baseline:
            typ = "baseline"
        elif tip:
            if tip == parent_tip:
                typ = ("checkpoint_tip"
                       if new_checkpoint else "checkpoint_commit")
            elif parent_rev == base_rev:
                typ = "checkpoint_base"
            else:
                typ = "checkpoint_commit"
                parent = parent_rev[:7]
        elif i < len(experiments) - 1:
            typ = "branch_commit"
        else:
            typ = "branch_base"

        if not is_baseline:
            new_checkpoint = not (tip and tip == parent_tip)

        row = [
            exp_name,
            name_rev,
            queued,
            typ,
            _format_time(exp.get("timestamp")),
            parent,
        ]
        _extend_row(row, metric_names,
                    exp.get("metrics", {}).items(), precision)
        _extend_row(row, param_names, exp.get("params", {}).items(), precision)

        yield row
Exemple #11
0
 def _scm_checkout(self, rev):
     self.scm.repo.git.reset(hard=True)
     if not Git.is_sha(rev) or not self.scm.has_rev(rev):
         self.scm.fetch(all=True)
     logger.debug("Checking out base experiment commit '%s'", rev)
     self.scm.checkout(rev)
Exemple #12
0
def _collect_rows(
    base_rev,
    experiments,
    metric_names,
    param_names,
    precision=DEFAULT_PRECISION,
    no_timestamp=False,
    sort_by=None,
    sort_order=None,
):
    if sort_by:
        if sort_by in metric_names:
            sort_type = "metrics"
        elif sort_by in param_names:
            sort_type = "params"
        else:
            raise InvalidArgumentError(f"Unknown sort column '{sort_by}'")
        reverse = sort_order == "desc"
        experiments = _sort_exp(experiments, sort_by, sort_type, reverse)

    new_checkpoint = True
    for i, (rev, exp) in enumerate(experiments.items()):
        row = []
        style = None
        queued = "*" if exp.get("queued", False) else ""

        tip = exp.get("checkpoint_tip")
        parent = ""
        if rev == "baseline":
            if Git.is_sha(base_rev):
                name_rev = base_rev[:7]
            else:
                name_rev = base_rev
            name = exp.get("name", name_rev)
            row.append(f"{name}")
            style = "bold"
        else:
            if tip:
                parent_rev = exp.get("checkpoint_parent", "")
                parent_exp = experiments.get(parent_rev, {})
                parent_tip = parent_exp.get("checkpoint_tip")
                if tip == parent_tip:
                    if new_checkpoint:
                        tree = "│ ╓"
                    else:
                        tree = "│ ╟"
                    new_checkpoint = False
                else:
                    if parent_rev == base_rev:
                        tree = "├─╨"
                    else:
                        tree = "│ ╟"
                        parent = f" ({parent_rev[:7]})"
                    new_checkpoint = True
            else:
                if i < len(experiments) - 1:
                    tree = "├──"
                else:
                    tree = "└──"
                new_checkpoint = True
            name = exp.get("name", rev[:7])
            row.append(f"{tree} {queued}{name}{parent}")

        if not no_timestamp:
            row.append(_format_time(exp.get("timestamp")))

        _extend_row(row, metric_names,
                    exp.get("metrics", {}).items(), precision)
        _extend_row(row, param_names, exp.get("params", {}).items(), precision)

        yield row, style
Exemple #13
0
def _collect_rows(
    base_rev,
    experiments,
    metric_names,
    param_names,
    precision=DEFAULT_PRECISION,
    sort_by=None,
    sort_order=None,
    fill_value=FILL_VALUE,
    iso=False,
):
    from dvc.scm.git import Git

    if sort_by:
        sort_path, sort_name, sort_type = _sort_column(sort_by, metric_names,
                                                       param_names)
        reverse = sort_order == "desc"
        experiments = _sort_exp(experiments, sort_path, sort_name, sort_type,
                                reverse)

    new_checkpoint = True
    for i, (rev, results) in enumerate(experiments.items()):
        exp = results.get("data", {})
        if exp.get("running"):
            state = "Running"
        elif exp.get("queued"):
            state = "Queued"
        else:
            state = fill_value
        executor = exp.get("executor", fill_value)
        is_baseline = rev == "baseline"

        if is_baseline:
            name_rev = base_rev[:7] if Git.is_sha(base_rev) else base_rev
        else:
            name_rev = rev[:7]

        exp_name = exp.get("name", "")
        tip = exp.get("checkpoint_tip")

        parent_rev = exp.get("checkpoint_parent", "")
        parent_exp = experiments.get(parent_rev, {}).get("data", {})
        parent_tip = parent_exp.get("checkpoint_tip")

        parent = ""
        if is_baseline:
            typ = "baseline"
        elif tip:
            if tip == parent_tip:
                typ = ("checkpoint_tip"
                       if new_checkpoint else "checkpoint_commit")
            elif parent_rev == base_rev:
                typ = "checkpoint_base"
            else:
                typ = "checkpoint_commit"
                parent = parent_rev[:7]
        elif i < len(experiments) - 1:
            typ = "branch_commit"
        else:
            typ = "branch_base"

        if not is_baseline:
            new_checkpoint = not (tip and tip == parent_tip)

        row = [
            exp_name,
            name_rev,
            typ,
            _format_time(exp.get("timestamp"), fill_value, iso),
            parent,
            state,
            executor,
        ]
        fill_value = FILL_VALUE_ERRORED if results.get("error") else fill_value
        _extend_row(
            row,
            metric_names,
            exp.get("metrics", {}).items(),
            precision,
            fill_value=fill_value,
        )
        _extend_row(
            row,
            param_names,
            exp.get("params", {}).items(),
            precision,
            fill_value=fill_value,
        )

        yield row
Exemple #14
0
def _collect_rows(
    base_rev,
    experiments,
    metric_names,
    param_names,
    precision=DEFAULT_PRECISION,
    sort_by=None,
    sort_order=None,
):
    from dvc.scm.git import Git

    if sort_by:
        sort_path, sort_name, sort_type = _sort_column(sort_by, metric_names,
                                                       param_names)
        reverse = sort_order == "desc"
        experiments = _sort_exp(experiments, sort_path, sort_name, sort_type,
                                reverse)

    new_checkpoint = True
    for i, (rev, exp) in enumerate(experiments.items()):
        queued = "*" if exp.get("queued", False) else ""

        tip = exp.get("checkpoint_tip")
        parent = ""
        if rev == "baseline":
            if Git.is_sha(base_rev):
                name_rev = base_rev[:7]
            else:
                name_rev = base_rev
            text = exp.get("name", name_rev)
        else:
            if tip:
                parent_rev = exp.get("checkpoint_parent", "")
                parent_exp = experiments.get(parent_rev, {})
                parent_tip = parent_exp.get("checkpoint_tip")
                if tip == parent_tip:
                    if new_checkpoint:
                        tree = "│ ╓"
                    else:
                        tree = "│ ╟"
                    new_checkpoint = False
                else:
                    if parent_rev == base_rev:
                        tree = "├─╨"
                    else:
                        tree = "│ ╟"
                        parent = f" ({parent_rev[:7]})"
                    new_checkpoint = True
            else:
                if i < len(experiments) - 1:
                    tree = "├──"
                else:
                    tree = "└──"
                new_checkpoint = True
            name = exp.get("name", rev[:7])
            text = f"{tree} {queued}{name}{parent}"

        row = [text, _format_time(exp.get("timestamp")), rev == "baseline"]
        _extend_row(row, metric_names,
                    exp.get("metrics", {}).items(), precision)
        _extend_row(row, param_names, exp.get("params", {}).items(), precision)

        yield row