Esempio n. 1
0
    def test_progress_awareness(self, mocker, capsys, caplog):
        from dvc.progress import progress

        with mocker.patch("sys.stdout.isatty", return_value=True):
            progress.set_n_total(100)
            progress.update_target("progress", 1, 10)

            # logging an invisible message should not break
            # the progress bar output
            with caplog.at_level(logging.INFO, logger="dvc"):
                debug_record = logging.LogRecord(
                    name="dvc",
                    level=logging.DEBUG,
                    pathname=__name__,
                    lineno=1,
                    msg="debug",
                    args=(),
                    exc_info=None,
                )

                formatter.format(debug_record)
                captured = capsys.readouterr()
                assert "\n" not in captured.out

            # just when the message is actually visible
            with caplog.at_level(logging.INFO, logger="dvc"):
                logger.info("some info")
                captured = capsys.readouterr()
                assert "\n" in captured.out
Esempio n. 2
0
    def test_progress_awareness(self, mocker, capsys, caplog):
        from dvc.progress import Tqdm

        with mocker.patch("sys.stdout.isatty", return_value=True):
            with Tqdm(total=100, desc="progress") as pbar:
                pbar.update()

                # logging an invisible message should not break
                # the progress bar output
                with caplog.at_level(logging.INFO, logger="dvc"):
                    debug_record = logging.LogRecord(
                        name="dvc",
                        level=logging.DEBUG,
                        pathname=__name__,
                        lineno=1,
                        msg="debug",
                        args=(),
                        exc_info=None,
                    )

                    formatter.format(debug_record)
                    captured = capsys.readouterr()
                    assert captured.out == ""

                #  when the message is actually visible
                with caplog.at_level(logging.INFO, logger="dvc"):
                    logger.info("some info")
                    captured = capsys.readouterr()
                    assert captured.out == ""
Esempio n. 3
0
    def save(self):
        if not os.path.exists(self.path):
            raise self.DoesNotExistError(self.rel_path)

        if not os.path.isfile(self.path) and not os.path.isdir(self.path):
            raise self.IsNotFileOrDirError(self.rel_path)

        if (os.path.isfile(self.path) and os.path.getsize(self.path) == 0) or (
                os.path.isdir(self.path) and len(os.listdir(self.path)) == 0):
            msg = "file/directory '{}' is empty.".format(self.rel_path)
            logger.warning(msg)

        if not self.use_cache:
            self.info = self.remote.save_info(self.path_info)
            self.verify_metric()
            if not self.IS_DEPENDENCY:
                msg = "Output '{}' doesn't use cache. Skipping saving."
                logger.info(msg.format(self.rel_path))
            return

        assert not self.IS_DEPENDENCY

        if not self.changed():
            msg = "Output '{}' didn't change. Skipping saving."
            logger.info(msg.format(self.rel_path))
            return

        if self.is_local:
            if self.repo.scm.is_tracked(self.path):
                raise OutputAlreadyTrackedError(self.rel_path)

            if self.use_cache:
                self.repo.scm.ignore(self.path)

        self.info = self.remote.save_info(self.path_info)
Esempio n. 4
0
File: remote.py Progetto: rpip/dvc
    def run(self):
        from dvc.remote import _get, RemoteLOCAL

        remote = _get({Config.SECTION_REMOTE_URL: self.args.url})
        if remote == RemoteLOCAL:
            self.args.url = self.resolve_path(
                self.args.url, self.configobj.filename
            )

        section = Config.SECTION_REMOTE_FMT.format(self.args.name)
        if (section in self.configobj.keys()) and not self.args.force:
            logger.error(
                "Remote with name {} already exists. "
                "Use -f (--force) to overwrite remote "
                "with new value".format(self.args.name)
            )
            return 1

        ret = self._set(section, Config.SECTION_REMOTE_URL, self.args.url)
        if ret != 0:
            return ret

        if self.args.default:
            msg = "Setting '{}' as a default remote.".format(self.args.name)
            logger.info(msg)
            ret = self._set(
                Config.SECTION_CORE, Config.SECTION_CORE_REMOTE, self.args.name
            )

        return ret
Esempio n. 5
0
    def run(self):
        recursive = not self.args.single_item
        saved_dir = os.path.realpath(os.curdir)
        if self.args.cwd:
            os.chdir(self.args.cwd)

        # Dirty hack so the for loop below can at least enter once
        if self.args.all_pipelines:
            self.args.targets = [None]

        ret = 0
        for target in self.args.targets:
            try:
                stages = self.project.reproduce(
                    target,
                    recursive=recursive,
                    force=self.args.force,
                    dry=self.args.dry,
                    interactive=self.args.interactive,
                    pipeline=self.args.pipeline,
                    all_pipelines=self.args.all_pipelines,
                    ignore_build_cache=self.args.ignore_build_cache)

                if len(stages) == 0:
                    logger.info(CmdDataStatus.UP_TO_DATE_MSG)

                if self.args.metrics:
                    self.project.metrics_show()
            except DvcException:
                logger.error()
                ret = 1
                break

        os.chdir(saved_dir)
        return ret
Esempio n. 6
0
File: local.py Progetto: ml-lab/dvc
    def save(self):
        if not self.use_cache:
            super(OutputLOCAL, self).save()
            self._verify_metric()
            msg = "Output '{}' doesn't use cache. Skipping saving."
            logger.info(msg.format(self.rel_path))
            return

        if not os.path.exists(self.path):
            raise self.DoesNotExistError(self.rel_path)

        if (not os.path.isfile(self.path) and not os.path.isdir(self.path)):
            raise self.IsNotFileOrDirError(self.rel_path)

        if (os.path.isfile(self.path) and os.path.getsize(self.path) == 0) or \
           (os.path.isdir(self.path) and len(os.listdir(self.path)) == 0):
            msg = "file/directory '{}' is empty.".format(self.rel_path)
            logger.warning(msg)

        if not self.changed():
            msg = "Output '{}' didn't change. Skipping saving."
            logger.info(msg.format(self.rel_path))
            return

        if self.is_local:
            if self.project.scm.is_tracked(self.path):
                raise OutputAlreadyTrackedError(self.rel_path)

            if self.use_cache:
                self.project.scm.ignore(self.path)

        self.info = self.project.cache.local.save(self.path_info)
Esempio n. 7
0
    def _save_dir(self, path_info):
        path = path_info["path"]
        md5, dir_info = self.state.update_info(path)
        dir_relpath = os.path.relpath(path)
        dir_size = len(dir_info)
        bar = dir_size > LARGE_DIR_SIZE

        logger.info("Linking directory '{}'.".format(dir_relpath))

        for processed, entry in enumerate(dir_info):
            relpath = entry[self.PARAM_RELPATH]
            m = entry[self.PARAM_CHECKSUM]
            p = os.path.join(path, relpath)
            c = self.get(m)

            if self.changed_cache(m):
                self._move(p, c)
            else:
                remove(p)

            self.link(c, p)

            if bar:
                progress.update_target(dir_relpath, processed, dir_size)

        self.state.update_link(path)

        if bar:
            progress.finish_target(dir_relpath)

        return {self.PARAM_CHECKSUM: md5}
Esempio n. 8
0
def show_metrics(metrics, all_branches=False, all_tags=False):
    for branch, val in metrics.items():
        if all_branches or all_tags:
            logger.info("{}:".format(branch))

        for fname, metric in val.items():
            logger.info("\t{}: {}".format(fname, metric))
Esempio n. 9
0
    def reproduce(
        self, force=False, dry=False, interactive=False, no_commit=False
    ):
        if not self.changed() and not force:
            return None

        if (self.cmd or self.is_import) and not self.locked and not dry:
            # Removing outputs only if we actually have command to reproduce
            self.remove_outs(ignore_remove=False)

        msg = (
            "Going to reproduce '{stage}'. "
            "Are you sure you want to continue?".format(stage=self.relpath)
        )

        if interactive and not prompt.confirm(msg):
            raise DvcException("reproduction aborted by the user")

        logger.info("Reproducing '{stage}'".format(stage=self.relpath))

        self.run(dry=dry, no_commit=no_commit)

        logger.debug("'{stage}' was reproduced".format(stage=self.relpath))

        return self
Esempio n. 10
0
    def ignore(self, path):
        entry, gitignore = self._get_gitignore(path)

        ignore_list = []
        if os.path.exists(gitignore):
            ignore_list = open(gitignore, "r").readlines()
            filtered = list(
                filter(lambda x: x.strip() == entry.strip(), ignore_list)
            )
            if filtered:
                return

        msg = "Adding '{}' to '{}'.".format(
            os.path.relpath(path), os.path.relpath(gitignore)
        )
        logger.info(msg)

        content = entry
        if ignore_list:
            content = "\n" + content

        with open(gitignore, "a") as fobj:
            fobj.write(content)

        if self.repo is not None:
            self.repo.files_to_git_add.append(os.path.relpath(gitignore))
Esempio n. 11
0
    def status(self, checksum_infos, remote, jobs=None, show_checksums=False):
        logger.info("Preparing to collect status from {}".format(remote.url))
        title = "Collecting information"

        ret = {}

        progress.set_n_total(1)
        progress.update_target(title, 0, 100)

        progress.update_target(title, 10, 100)

        ret = self._group(checksum_infos, show_checksums=show_checksums)
        md5s = list(ret.keys())

        progress.update_target(title, 30, 100)

        remote_exists = list(remote.cache_exists(md5s))

        progress.update_target(title, 90, 100)

        local_exists = self.cache_exists(md5s)

        progress.finish_target(title)

        self._fill_statuses(ret, local_exists, remote_exists)

        self._log_missing_caches(ret)

        return ret
Esempio n. 12
0
    def status(self, checksum_infos, remote, jobs=None, show_checksums=False):
        logger.info("Preparing to collect status from {}".format(remote.url))
        title = "Collecting information"

        ret = {}

        progress.set_n_total(1)
        progress.update_target(title, 0, 100)

        progress.update_target(title, 10, 100)

        ret = self._group(checksum_infos, show_checksums=show_checksums)
        md5s = list(ret.keys())

        progress.update_target(title, 30, 100)

        remote_exists = list(remote.cache_exists(md5s))

        progress.update_target(title, 90, 100)

        local_exists = self.cache_exists(md5s)

        progress.finish_target(title)

        for md5, info in ret.items():
            info["status"] = STATUS_MAP[
                (md5 in local_exists, md5 in remote_exists)
            ]

        return ret
Esempio n. 13
0
    def _show(self, metrics):
        for branch, val in metrics.items():
            if self.args.all_branches or self.args.all_tags:
                logger.info("{}:".format(branch))

            for fname, metric in val.items():
                logger.info("\t{}: {}".format(fname, metric))
Esempio n. 14
0
    def do_run(self, target=None):
        indent = 1 if self.args.cloud else 0
        try:
            st = self.project.status(
                target=target,
                jobs=self.args.jobs,
                cloud=self.args.cloud,
                show_checksums=self.args.show_checksums,
                remote=self.args.remote,
                all_branches=self.args.all_branches,
                all_tags=self.args.all_tags,
                with_deps=self.args.with_deps,
            )
            if st:
                if self.args.quiet:
                    return 1
                else:
                    self._show(st, indent)
            else:
                logger.info(self.UP_TO_DATE_MSG)

        except Exception:
            logger.error("failed to obtain data status")
            return 1
        return 0
Esempio n. 15
0
    def _remind_to_git_add(self):
        if len(self._files_to_git_add) == 0:
            return

        msg = '\nTo track the changes with git run:\n\n'
        msg += '\tgit add ' + " ".join(self._files_to_git_add)

        logger.info(msg)
Esempio n. 16
0
File: pkg.py Progetto: rpip/dvc
 def create_module_dir(module_name):
     module_dir = os.path.join(GitPackage.MODULES_DIR, module_name)
     if os.path.exists(module_dir):
         logger.info("Updating package {}".format(module_name))
         shutil.rmtree(module_dir)
     else:
         logger.info("Adding package {}".format(module_name))
     return module_dir
Esempio n. 17
0
    def init(root_dir=os.curdir, no_scm=False, force=False):
        """
        Creates an empty project on the given directory -- basically a
        `.dvc` directory with subdirectories for configuration and cache.

        It should be tracked by a SCM or use the `--no-scm` flag.

        If the given directory is not empty, you must use the `--force`
        flag to override it.

        Args:
            root_dir: Path to project's root directory.

        Returns:
            Project instance.

        Raises:
            KeyError: Raises an exception.
        """
        import shutil
        from dvc.scm import SCM, Base
        from dvc.config import Config

        root_dir = os.path.abspath(root_dir)
        dvc_dir = os.path.join(root_dir, Project.DVC_DIR)
        scm = SCM(root_dir)
        if type(scm) == Base and not no_scm:
            raise InitError(
                "{project} is not tracked by any supported scm tool"
                " (e.g. git). Use '--no-scm' if you don't want to use any scm.".format(
                    project=root_dir
                )
            )

        if os.path.isdir(dvc_dir):
            if not force:
                raise InitError(
                    "'{project}' exists. Use '-f' to force.".format(
                        project=os.path.relpath(dvc_dir)
                    )
                )

            shutil.rmtree(dvc_dir)

        os.mkdir(dvc_dir)

        config = Config.init(dvc_dir)
        proj = Project(root_dir)

        scm.add([config.config_file])

        if scm.ignore_file:
            scm.add([os.path.join(dvc_dir, scm.ignore_file)])
            logger.info("\nYou can now commit the changes to git.\n")

        proj._welcome_message()

        return proj
Esempio n. 18
0
File: remote.py Progetto: ml-lab/dvc
 def run(self):
     for section in self.configobj.keys():
         r = re.match(Config.SECTION_REMOTE_REGEX, section)
         if r:
             name = r.group('name')
             url = self.configobj[section].get(Config.SECTION_REMOTE_URL,
                                               '')
             logger.info('{}\t{}'.format(name, url))
     return 0
Esempio n. 19
0
    def _remind_to_git_add(self):
        if not self._files_to_git_add:
            return

        logger.info(
            '\n'
            'To track the changes with git run:\n'
            '\n'
            '\tgit add {files}'.format(files=' '.join(self._files_to_git_add)))
Esempio n. 20
0
    def _remind_to_git_add(self):
        if not self.files_to_git_add:
            return

        logger.info(
            "\n"
            "To track the changes with git run:\n"
            "\n"
            "\tgit add {files}".format(files=" ".join(self.files_to_git_add)))
Esempio n. 21
0
    def show(self, config, section, opt):
        if section not in config.keys():
            raise ConfigError("section '{}' doesn't exist".format(section))

        if opt not in config[section].keys():
            raise ConfigError("option '{}.{}' doesn't exist".format(
                section, opt))

        logger.info(config[section][opt])
Esempio n. 22
0
    def _show(self, status, indent=0):
        ind = indent * self.STATUS_INDENT

        for key, value in status.items():
            if isinstance(value, dict):
                logger.info("{}{}".format(ind, key))
                self._show(value, indent + 1)
            else:
                msg = "{}{}{}".format(ind, self._normalize(value), key)
                logger.info(msg)
Esempio n. 23
0
    def dump(self, fname=None):
        fname = fname or self.path

        self._check_dvc_filename(fname)

        logger.info("Saving information to '{file}'.".format(
            file=os.path.relpath(fname)))

        with open(fname, "w") as fd:
            yaml.safe_dump(self.dumpd(), fd, default_flow_style=False)

        self.project.files_to_git_add.append(os.path.relpath(fname))
Esempio n. 24
0
    def dump(self, fname=None):
        if not fname:
            fname = self.path

        self._check_dvc_filename(fname)

        msg = "Saving information to '{}'.".format(os.path.relpath(fname))
        logger.info(msg)

        with open(fname, 'w') as fd:
            yaml.safe_dump(self.dumpd(), fd, default_flow_style=False)

        self.project._files_to_git_add.append(os.path.relpath(fname))
Esempio n. 25
0
    def dump(self):
        fname = self.path

        self._check_dvc_filename(fname)

        logger.info("Saving information to '{file}'.".format(
            file=os.path.relpath(fname)))
        d = self.dumpd()

        with open(fname, "w") as fd:
            yaml.safe_dump(d, fd, default_flow_style=False)

        self.repo.scm.track_file(os.path.relpath(fname))
Esempio n. 26
0
File: remote.py Progetto: ml-lab/dvc
    def run(self):
        section = Config.SECTION_REMOTE_FMT.format(self.args.name)
        ret = self._set(section, Config.SECTION_REMOTE_URL, self.args.url)
        if ret != 0:
            return ret

        if self.args.default:
            msg = "Setting '{}' as a default remote.".format(self.args.name)
            logger.info(msg)
            ret = self._set(Config.SECTION_CORE, Config.SECTION_CORE_REMOTE,
                            self.args.name)

        return ret
Esempio n. 27
0
    def run(self, dry=False):
        if self.locked:
            msg = u"Verifying outputs in locked stage '{}'"
            logger.info(msg.format(self.relpath))
            if not dry:
                self.check_missing_outputs()
        elif self.is_import:
            msg = u"Importing '{}' -> '{}'"
            logger.info(msg.format(self.deps[0].path, self.outs[0].path))

            if not dry:
                self.deps[0].download(self.outs[0].path_info)
        elif self.is_data_source:
            msg = u"Verifying data sources in '{}'".format(self.relpath)
            logger.info(msg)
            if not dry:
                self.check_missing_outputs()
        else:
            msg = u'Running command:\n\t{}'.format(self.cmd)
            logger.info(msg)

            if not dry:
                self._run()

        if not dry:
            self.save()
Esempio n. 28
0
    def do_checkout(self, path_info, checksum, force=False):
        path = path_info["path"]
        md5 = checksum
        cache = self.get(md5)

        if not self.is_dir_cache(cache):
            if os.path.exists(path):
                self.safe_remove(path_info, force=force)

            self.link(cache, path)
            self.state.update_link(path)
            return

        # Create dir separately so that dir is created
        # even if there are no files in it
        if not os.path.exists(path):
            os.makedirs(path)

        dir_info = self.load_dir_cache(md5)
        dir_relpath = os.path.relpath(path)
        dir_size = len(dir_info)
        bar = dir_size > LARGE_DIR_SIZE

        logger.info("Linking directory '{}'.".format(dir_relpath))

        for processed, entry in enumerate(dir_info):
            relpath = entry[self.PARAM_RELPATH]
            m = entry[self.PARAM_CHECKSUM]
            p = os.path.join(path, relpath)
            c = self.get(m)

            entry_info = {"scheme": path_info["scheme"], self.PARAM_PATH: p}

            entry_checksum_info = {self.PARAM_CHECKSUM: m}

            if self.changed(entry_info, entry_checksum_info):
                if os.path.exists(p):
                    self.safe_remove(entry_info, force=force)

                self.link(c, p)

            if bar:
                progress.update_target(dir_relpath, processed, dir_size)

        self._discard_working_directory_changes(path, dir_info, force=force)

        self.state.update_link(path)

        if bar:
            progress.finish_target(dir_relpath)
Esempio n. 29
0
    def run(self, dry=False):
        if self.locked:
            logger.info("Verifying outputs in locked stage '{stage}'".format(
                stage=self.relpath))
            if not dry:
                self.check_missing_outputs()

        elif self.is_import:
            logger.info("Importing '{dep}' -> '{out}'".format(
                dep=self.deps[0].path, out=self.outs[0].path))
            if not dry:
                if self._already_cached():
                    self.outs[0].checkout()
                else:
                    self.deps[0].download(self.outs[0].path_info)

        elif self.is_data_source:
            msg = "Verifying data sources in '{}'".format(self.relpath)
            logger.info(msg)
            if not dry:
                self.check_missing_outputs()

        else:
            logger.info("Running command:\n\t{}".format(self.cmd))
            if not dry:
                if self._already_cached():
                    self.checkout()
                else:
                    self._run()

        if not dry:
            self.save()
Esempio n. 30
0
    def save(self, path_info):
        if path_info['scheme'] != 'local':
            raise NotImplementedError

        path = path_info['path']

        msg = "Saving '{}' to cache '{}'."
        logger.info(
            msg.format(os.path.relpath(path), os.path.relpath(self.cache_dir)))

        if os.path.isdir(path):
            return self._save_dir(path_info)
        else:
            return self._save_file(path_info)