Example #1
0
File: base.py Project: simonlsk/dvc
    def safe_remove(self, path_info, force=False):
        if not self.tree.exists(path_info):
            return

        if not force and not self.already_cached(path_info):
            msg = ("file '{}' is going to be removed."
                   " Are you sure you want to proceed?".format(str(path_info)))

            if not prompt.confirm(msg):
                raise ConfirmRemoveError(str(path_info))

        self.tree.remove(path_info)
Example #2
0
    def _collect_used_dir_cache(self, remote=None, force=False, jobs=None):
        """Get a list of `info`s related to the given directory.

        - Pull the directory entry from the remote cache if it was changed.

        Example:

            Given the following commands:

            $ echo "foo" > directory/foo
            $ echo "bar" > directory/bar
            $ dvc add directory

            It will return a NamedCache like:

            nc = NamedCache()
            nc.add(self.scheme, 'c157a79031e1', 'directory/foo')
            nc.add(self.scheme, 'd3b07384d113', 'directory/bar')
        """

        cache = NamedCache()

        if self.cache.changed_cache_file(self.checksum):
            try:
                self.repo.cloud.pull(
                    NamedCache.make("local", self.checksum, str(self)),
                    jobs=jobs,
                    remote=remote,
                    show_checksums=False,
                )
            except DvcException:
                logger.debug("failed to pull cache for '{}'".format(self))

        if self.cache.changed_cache_file(self.checksum):
            msg = (
                "Missing cache for directory '{}'. "
                "Cache for files inside will be lost. "
                "Would you like to continue? Use '-f' to force."
            )
            if not force and not prompt.confirm(msg.format(self.path_info)):
                raise CollectCacheError(
                    "unable to fully collect used cache"
                    " without cache for directory '{}'".format(self)
                )
            else:
                return cache

        for entry in self.dir_cache:
            checksum = entry[self.remote.PARAM_CHECKSUM]
            path_info = self.path_info / entry[self.remote.PARAM_RELPATH]
            cache.add(self.scheme, checksum, str(path_info))

        return cache
Example #3
0
    def remove_with_prompt(self, force=False):
        if not self.exists():
            return

        msg = (
            "'{}' already exists. Do you wish to run the command and "
            "overwrite it?".format(relpath(self.path))
        )
        if not (force or prompt.confirm(msg)):
            raise StageFileAlreadyExistsError(self.path)

        self.remove()
Example #4
0
    def run(self):
        from dvc.repo.gc import _raise_error_if_all_disabled

        if not self.repo.experiments:
            return 0

        _raise_error_if_all_disabled(
            all_branches=self.args.all_branches,
            all_tags=self.args.all_tags,
            all_commits=self.args.all_commits,
            workspace=self.args.workspace,
        )

        msg = "This will remove all experiments except those derived from "

        msg += "the workspace"
        if self.args.all_commits:
            msg += " and all git commits"
        elif self.args.all_branches and self.args.all_tags:
            msg += " and all git branches and tags"
        elif self.args.all_branches:
            msg += " and all git branches"
        elif self.args.all_tags:
            msg += " and all git tags"
        msg += " of the current repo."
        if self.args.queued:
            msg += " Run queued experiments will be preserved."
        if self.args.queued:
            msg += " Run queued experiments will be removed."

        logger.warning(msg)

        msg = "Are you sure you want to proceed?"
        if not self.args.force and not prompt.confirm(msg):
            return 1

        removed = self.repo.experiments.gc(
            all_branches=self.args.all_branches,
            all_tags=self.args.all_tags,
            all_commits=self.args.all_commits,
            workspace=self.args.workspace,
            queued=self.args.queued,
        )

        if removed:
            logger.info(
                f"Removed {removed} experiments. To remove unused cache files "
                "use 'dvc gc'."
            )
        else:
            logger.info("No experiments to remove.")
        return 0
Example #5
0
File: base.py Project: pyanezs/dvc
    def collect_used_dir_cache(self,
                               remote=None,
                               force=False,
                               jobs=None,
                               filter_info=None):
        """Get a list of `info`s related to the given directory.

        - Pull the directory entry from the remote cache if it was changed.

        Example:

            Given the following commands:

            $ echo "foo" > directory/foo
            $ echo "bar" > directory/bar
            $ dvc add directory

            It will return a NamedCache like:

            nc = NamedCache()
            nc.add(self.scheme, 'c157a79031e1', 'directory/foo')
            nc.add(self.scheme, 'd3b07384d113', 'directory/bar')
        """

        cache = NamedCache()

        try:
            self.get_dir_cache(jobs=jobs, remote=remote)
        except DvcException:
            logger.debug(f"failed to pull cache for '{self}'")

        try:
            objects.check(self.odb, self.odb.get(self.hash_info))
        except (FileNotFoundError, objects.ObjectFormatError):
            msg = ("Missing cache for directory '{}'. "
                   "Cache for files inside will be lost. "
                   "Would you like to continue? Use '-f' to force.")
            if not force and not prompt.confirm(msg.format(self.path_info)):
                raise CollectCacheError(
                    "unable to fully collect used cache"
                    " without cache for directory '{}'".format(self))
            return cache

        path = str(self.path_info)
        filter_path = str(filter_info) if filter_info else None
        for entry_key, entry_hash_info in self.dir_cache.items():
            entry_path = os.path.join(path, *entry_key)
            if (not filter_path or entry_path == filter_path
                    or entry_path.startswith(filter_path + os.sep)):
                cache.add(self.scheme, entry_hash_info.value, entry_path)

        return cache
Example #6
0
    def run(self):
        from dvc.repo.gc import _raise_error_if_all_disabled

        _raise_error_if_all_disabled(
            all_branches=self.args.all_branches,
            all_tags=self.args.all_tags,
            all_commits=self.args.all_commits,
            workspace=self.args.workspace,
        )

        msg = "This will remove all cache except items used in "

        msg += "the workspace"
        if self.args.all_commits:
            msg += " and all git commits"
        elif self.args.all_branches and self.args.all_tags:
            msg += " and all git branches and tags"
        elif self.args.all_branches:
            msg += " and all git branches"
        elif self.args.all_tags:
            msg += " and all git tags"
        elif self.args.all_experiments:
            msg += " and all experiments"

        if self.args.repos:
            msg += " of the current and the following repos:"

            for repo_path in self.args.repos:
                msg += "\n  - %s" % os.path.abspath(repo_path)
        else:
            msg += " of the current repo."

        logger.warning(msg)

        msg = "Are you sure you want to proceed?"
        if not self.args.force and not prompt.confirm(msg):
            return 1

        self.repo.gc(
            all_branches=self.args.all_branches,
            all_tags=self.args.all_tags,
            all_commits=self.args.all_commits,
            all_experiments=self.args.all_experiments,
            cloud=self.args.cloud,
            remote=self.args.remote,
            force=self.args.force,
            jobs=self.args.jobs,
            repos=self.args.repos,
            workspace=self.args.workspace,
        )
        return 0
Example #7
0
def _remove(path_info, tree, cache, force=False):
    if not tree.exists(path_info):
        return

    if not force and not _is_cached(cache, path_info, tree):
        msg = (
            "file '{}' is going to be removed."
            " Are you sure you want to proceed?".format(str(path_info))
        )

        if not prompt.confirm(msg):
            raise ConfirmRemoveError(str(path_info))

    tree.remove(path_info)
Example #8
0
    def safe_remove(self, path_info, force=False):
        if not self.exists(path_info):
            return

        if not force and not self.already_cached(path_info):
            msg = ("file '{}' is going to be removed."
                   ' Are you sure you want to proceed?'.format(str(path_info)))

            if not prompt.confirm(msg):
                raise DvcException(
                    "unable to remove '{}' without a confirmation"
                    " from the user. Use '-f' to force.".format(
                        str(path_info)))

        self.remove(path_info)
Example #9
0
    def _is_outs_only(self, target):
        if not self.args.purge:
            return True

        if self.args.force:
            return False

        msg = "Are you sure you want to remove {} with its outputs?".format(
            target)

        if prompt.confirm(msg):
            return False

        raise DvcException("Cannot purge without a confirmation from the user."
                           " Use `-f` to force.")
Example #10
0
def _remove(path_info, fs, in_cache, force=False):
    if not fs.exists(path_info):
        return

    if force:
        fs.remove(path_info)
        return

    if not in_cache:
        msg = (f"file/directory '{path_info}' is going to be removed. "
               "Are you sure you want to proceed?")

        if not prompt.confirm(msg):
            raise ConfirmRemoveError(str(path_info))

    fs.remove(path_info)
Example #11
0
    def reproduce(self, interactive=False, **kwargs):
        if not (kwargs.get("force", False) or self.changed()):
            logger.info("Stage '%s' didn't change, skipping", self.addressing)
            return None

        msg = ("Going to reproduce {stage}. "
               "Are you sure you want to continue?".format(stage=self))

        if interactive and not prompt.confirm(msg):
            raise DvcException("reproduction aborted by the user")

        self.run(**kwargs)

        logger.debug(f"{self} was reproduced")

        return self
Example #12
0
    def reproduce(self, interactive=False, **kwargs):

        if not kwargs.get("force", False) and not self.changed():
            return None

        msg = ("Going to reproduce {stage}. "
               "Are you sure you want to continue?".format(stage=self))

        if interactive and not prompt.confirm(msg):
            raise DvcException("reproduction aborted by the user")

        self.run(**kwargs)

        logger.debug("{stage} was reproduced".format(stage=self))

        return self
Example #13
0
    def check_can_commit(self, force):
        changed_deps = self._changed_entries(self.deps)
        changed_outs = self._changed_entries(self.outs)

        if changed_deps or changed_outs or self.changed_md5():
            msg = ("dependencies {}".format(changed_deps)
                   if changed_deps else "")
            msg += " and " if (changed_deps and changed_outs) else ""
            msg += "outputs {}".format(changed_outs) if changed_outs else ""
            msg += "md5" if not (changed_deps or changed_outs) else ""
            msg += " of '{}' changed. Are you sure you commit it?".format(
                self.relpath)
            if not force and not prompt.confirm(msg):
                raise StageCommitError(
                    "unable to commit changed '{}'. Use `-f|--force` to "
                    "force.`".format(self.relpath))
            self.save()
Example #14
0
    def check_can_commit(self, force):
        changed_deps = self._changed_entries(self.deps)
        changed_outs = self._changed_entries(self.outs)

        if changed_deps or changed_outs or self.stage_changed():
            msg = ("dependencies {}".format(changed_deps)
                   if changed_deps else "")
            msg += " and " if (changed_deps and changed_outs) else ""
            msg += "outputs {}".format(changed_outs) if changed_outs else ""
            msg += "md5" if not (changed_deps or changed_outs) else ""
            msg += " of {} changed. ".format(self)
            msg += "Are you sure you want to commit it?"
            if not force and not prompt.confirm(msg):
                raise StageCommitError(
                    "unable to commit changed {}. Use `-f|--force` to "
                    "force.".format(self))
            self.save()
Example #15
0
    def run(self):
        try:
            statement = (
                "This will destroy all information about your pipelines,"
                " all data files, as well as cache in .dvc/cache."
                "\n"
                "Are you sure you want to continue?")

            if not self.args.force and not prompt.confirm(statement):
                raise DvcException(
                    "cannot destroy without a confirmation from the user."
                    " Use `-f` to force.")

            self.repo.destroy()
        except Exception:
            logger.exception("failed to destroy DVC")
            return 1
        return 0
Example #16
0
    def run_cmd(self):
        try:
            statement = (
                'This will destroy all information about your pipelines,'
                ' all data files, as well as cache in .dvc/cache.'
                '\n'
                'Are you sure you want to continue?')

            if not self.args.force and not prompt.confirm(statement):
                raise DvcException(
                    "cannot destroy without a confirmation from the user."
                    " Use '-f' to force.")

            self.project.destroy()
        except Exception:
            logger.error('failed to destroy DVC')
            return 1
        return 0
Example #17
0
def _remove(path_info, fs, cache, force=False):
    if not fs.exists(path_info):
        return

    if force:
        fs.remove(path_info)
        return

    current = stage(cache, path_info, fs, fs.PARAM_CHECKSUM).hash_info
    try:
        obj = load(cache, current)
        check(cache, obj)
    except (FileNotFoundError, ObjectFormatError):
        msg = (f"file/directory '{path_info}' is going to be removed. "
               "Are you sure you want to proceed?")

        if not prompt.confirm(msg):
            raise ConfirmRemoveError(str(path_info))

    fs.remove(path_info)
Example #18
0
    def reproduce(
        self, force=False, dry=False, interactive=False, no_commit=False
    ):
        if not self.changed() and not force:
            return None

        msg = (
            "Going to reproduce '{stage}'. "
            "Are you sure you want to continue?".format(stage=self.relpath)
        )

        if interactive and not prompt.confirm(msg):
            raise DvcException("reproduction aborted by the user")

        logger.info("Reproducing '{stage}'".format(stage=self.relpath))

        self.run(dry=dry, no_commit=no_commit, force=force)

        logger.debug("'{stage}' was reproduced".format(stage=self.relpath))

        return self
Example #19
0
    def reproduce(self, force=False, dry=False, interactive=False):
        if not self.changed() and not force:
            return None

        if (self.cmd or self.is_import) and not self.locked and not dry:
            # Removing outputs only if we actually have command to reproduce
            self.remove_outs(ignore_remove=False)

        msg = "Going to reproduce '{stage}'. Are you sure you want to continue?".format(
            stage=self.relpath)

        if interactive and not prompt.confirm(msg):
            raise DvcException("reproduction aborted by the user")

        logger.info("Reproducing '{stage}'".format(stage=self.relpath))

        self.run(dry=dry)

        logger.debug("'{stage}' was reproduced".format(stage=self.relpath))

        return self
Example #20
0
    def _collect_dir_cache(self,
                           out,
                           branch=None,
                           remote=None,
                           force=False,
                           jobs=None):
        info = out.dumpd()
        ret = [info]
        r = out.remote
        md5 = info[r.PARAM_CHECKSUM]

        if self.cache.local.changed_cache_file(md5):
            try:
                self.cloud.pull(ret,
                                jobs=jobs,
                                remote=remote,
                                show_checksums=False)
            except DvcException as exc:
                msg = "Failed to pull cache for '{}': {}"
                logger.debug(msg.format(out, exc))

        if self.cache.local.changed_cache_file(md5):
            msg = ("Missing cache for directory '{}'. "
                   "Cache for files inside will be lost. "
                   "Would you like to continue? Use '-f' to force. ")
            if not force and not prompt.confirm(msg):
                raise DvcException(
                    "unable to fully collect used cache"
                    " without cache for directory '{}'".format(out))
            else:
                return ret

        for i in self.cache.local.load_dir_cache(md5):
            i["branch"] = branch
            i[r.PARAM_PATH] = os.path.join(info[r.PARAM_PATH],
                                           i[r.PARAM_RELPATH])
            ret.append(i)

        return ret
Example #21
0
    def create(
        repo=None,
        cmd=None,
        deps=None,
        outs=None,
        outs_no_cache=None,
        metrics=None,
        metrics_no_cache=None,
        fname=None,
        cwd=None,
        wdir=None,
        locked=False,
        add=False,
        overwrite=True,
        ignore_build_cache=False,
        remove_outs=False,
        validate_state=True,
        outs_persist=None,
        outs_persist_no_cache=None,
        erepo=None,
    ):
        if outs is None:
            outs = []
        if deps is None:
            deps = []
        if outs_no_cache is None:
            outs_no_cache = []
        if metrics is None:
            metrics = []
        if metrics_no_cache is None:
            metrics_no_cache = []
        if outs_persist is None:
            outs_persist = []
        if outs_persist_no_cache is None:
            outs_persist_no_cache = []

        # Backward compatibility for `cwd` option
        if wdir is None and cwd is not None:
            if fname is not None and os.path.basename(fname) != fname:
                raise StageFileBadNameError(
                    "stage file name '{fname}' may not contain subdirectories"
                    " if '-c|--cwd' (deprecated) is specified. Use '-w|--wdir'"
                    " along with '-f' to specify stage file path and working"
                    " directory.".format(fname=fname)
                )
            wdir = cwd
        else:
            wdir = os.curdir if wdir is None else wdir

        stage = Stage(repo=repo, wdir=wdir, cmd=cmd, locked=locked)

        Stage._fill_stage_outputs(
            stage,
            outs,
            outs_no_cache,
            metrics,
            metrics_no_cache,
            outs_persist,
            outs_persist_no_cache,
        )
        stage.deps = dependency.loads_from(stage, deps, erepo=erepo)

        stage._check_circular_dependency()
        stage._check_duplicated_arguments()

        if not fname:
            fname = Stage._stage_fname(stage.outs, add=add)
        stage._check_dvc_filename(fname)

        wdir = os.path.abspath(wdir)

        if cwd is not None:
            path = os.path.join(wdir, fname)
        else:
            path = os.path.abspath(fname)

        Stage._check_stage_path(repo, wdir)
        Stage._check_stage_path(repo, os.path.dirname(path))

        stage.wdir = wdir
        stage.path = path

        # NOTE: remove outs before we check build cache
        if remove_outs:
            logger.warning(
                "--remove-outs is deprecated."
                " It is now the default behavior,"
                " so there's no need to use this option anymore."
            )
            stage.remove_outs(ignore_remove=False)
            logger.warning("Build cache is ignored when using --remove-outs.")
            ignore_build_cache = True
        else:
            stage.unprotect_outs()

        if os.path.exists(path) and any(out.persist for out in stage.outs):
            logger.warning("Build cache is ignored when persisting outputs.")
            ignore_build_cache = True

        if validate_state:
            if os.path.exists(path):
                if not ignore_build_cache and stage.is_cached:
                    logger.info("Stage is cached, skipping.")
                    return None

                msg = (
                    "'{}' already exists. Do you wish to run the command and "
                    "overwrite it?".format(stage.relpath)
                )

                if not overwrite and not prompt.confirm(msg):
                    raise StageFileAlreadyExistsError(stage.relpath)

                os.unlink(path)

        return stage
Example #22
0
 def test_eof(self, mock_input, mock_isatty):
     ret = confirm("message")
     mock_isatty.assert_called()
     mock_input.assert_called()
     self.assertFalse(ret)
Example #23
0
    def create(
        project=None,
        cmd=None,
        deps=[],
        outs=[],
        outs_no_cache=[],
        metrics_no_cache=[],
        fname=None,
        cwd=os.curdir,
        locked=False,
        add=False,
        overwrite=True,
        ignore_build_cache=False,
        remove_outs=False,
    ):

        stage = Stage(project=project, cwd=cwd, cmd=cmd, locked=locked)

        stage.outs = output.loads_from(stage, outs, use_cache=True)
        stage.outs += output.loads_from(stage, outs_no_cache, use_cache=False)
        stage.outs += output.loads_from(stage,
                                        metrics_no_cache,
                                        use_cache=False,
                                        metric=True)
        stage.deps = dependency.loads_from(stage, deps)

        stage._check_circular_dependency()
        stage._check_duplicated_arguments()

        if fname is not None and os.path.basename(fname) != fname:
            raise StageFileBadNameError(
                "stage file name '{fname}' should not contain subdirectories."
                " Use '-c|--cwd' to change location of the stage file.".format(
                    fname=fname))

        fname, cwd = Stage._stage_fname_cwd(fname, cwd, stage.outs, add=add)

        Stage._check_inside_project(project, cwd)

        cwd = os.path.abspath(cwd)
        path = os.path.join(cwd, fname)

        stage.cwd = cwd
        stage.path = path

        # NOTE: remove outs before we check build cache
        if remove_outs:
            stage.remove_outs(ignore_remove=False)
            logger.warning("Build cache is ignored when using --remove-outs.")
            ignore_build_cache = True
        else:
            stage.unprotect_outs()

        if os.path.exists(path):
            if not ignore_build_cache and stage.is_cached:
                logger.info("Stage is cached, skipping.")
                return None

            msg = ("'{}' already exists. Do you wish to run the command and "
                   "overwrite it?".format(stage.relpath))

            if not overwrite and not prompt.confirm(msg):
                raise StageFileAlreadyExistsError(stage.relpath)

            os.unlink(path)

        return stage
Example #24
0
def prompt_to_commit(stage, changes, force=False):
    if not (force or prompt.confirm(_prepare_message(stage, changes))):
        raise StageCommitError(
            "unable to commit changed {}. Use `-f|--force` to "
            "force.".format(stage))
Example #25
0
    def create(repo, **kwargs):

        wdir = kwargs.get("wdir", None)
        cwd = kwargs.get("cwd", None)
        fname = kwargs.get("fname", None)
        add = kwargs.get("add", False)

        # Backward compatibility for `cwd` option
        if wdir is None and cwd is not None:
            if fname is not None and os.path.basename(fname) != fname:
                raise StageFileBadNameError(
                    "DVC-file name '{fname}' may not contain subdirectories"
                    " if `-c|--cwd` (deprecated) is specified. Use `-w|--wdir`"
                    " along with `-f` to specify DVC-file path with working"
                    " directory.".format(fname=fname))
            wdir = cwd
        elif wdir is None:
            wdir = os.curdir

        stage = Stage(
            repo=repo,
            wdir=wdir,
            cmd=kwargs.get("cmd", None),
            locked=kwargs.get("locked", False),
            always_changed=kwargs.get("always_changed", False),
        )

        Stage._fill_stage_outputs(stage, **kwargs)
        stage.deps = dependency.loads_from(stage,
                                           kwargs.get("deps", []),
                                           erepo=kwargs.get("erepo", None))

        stage._check_circular_dependency()
        stage._check_duplicated_arguments()

        if not fname:
            fname = Stage._stage_fname(stage.outs, add)
        stage._check_dvc_filename(fname)

        # Autodetecting wdir for add, we need to create outs first to do that,
        # so we start with wdir = . and remap out paths later.
        if add and kwargs.get("wdir") is None and cwd is None:
            wdir = os.path.dirname(fname)

            for out in chain(stage.outs, stage.deps):
                if out.is_in_repo:
                    out.def_path = relpath(out.path_info, wdir)

        wdir = os.path.abspath(wdir)

        if cwd is not None:
            path = os.path.join(wdir, fname)
        else:
            path = os.path.abspath(fname)

        Stage._check_stage_path(repo, wdir)
        Stage._check_stage_path(repo, os.path.dirname(path))

        stage.wdir = wdir
        stage.path = path

        ignore_build_cache = kwargs.get("ignore_build_cache", False)

        # NOTE: remove outs before we check build cache
        if kwargs.get("remove_outs", False):
            logger.warning("--remove-outs is deprecated."
                           " It is now the default behavior,"
                           " so there's no need to use this option anymore.")
            stage.remove_outs(ignore_remove=False)
            logger.warning("Build cache is ignored when using --remove-outs.")
            ignore_build_cache = True

        if os.path.exists(path) and any(out.persist for out in stage.outs):
            logger.warning("Build cache is ignored when persisting outputs.")
            ignore_build_cache = True

        if os.path.exists(path):
            if (not ignore_build_cache and stage.is_cached
                    and not stage.is_callback and not stage.always_changed):
                logger.info("Stage is cached, skipping.")
                return None

            msg = ("'{}' already exists. Do you wish to run the command and "
                   "overwrite it?".format(stage.relpath))

            if not kwargs.get("overwrite", True) and not prompt.confirm(msg):
                raise StageFileAlreadyExistsError(stage.relpath)

            os.unlink(path)

        return stage