コード例 #1
0
ファイル: gc.py プロジェクト: zjj2wry/dvc
    def run(self):
        msg = 'this will remove all cache except the cache that is used in '
        if not self.args.all_branches and not self.args.all_tags:
            msg += 'the current git branch'
        elif self.args.all_branches and not self.args.all_tags:
            msg += 'all git branches'
        elif not self.args.all_branches and self.args.all_tags:
            msg += 'all git tags'
        else:
            msg += 'all git branches and all git tags'

        if self.args.projects is not None and len(self.args.projects) > 0:
            msg += ' of the current and the following projects:'

            for project_path in self.args.projects:
                msg += '\n  - %s' % os.path.abspath(project_path)
        else:
            msg += ' of the current project.'

        logger.warning(msg)

        msg = 'Are you sure you want to proceed?'
        if not self.args.force and not prompt.confirm(msg):
            return 1

        self.project.gc(all_branches=self.args.all_branches,
                        all_tags=self.args.all_tags,
                        cloud=self.args.cloud,
                        remote=self.args.remote,
                        force=self.args.force,
                        jobs=self.args.jobs,
                        projects=self.args.projects)
        return 0
コード例 #2
0
ファイル: base.py プロジェクト: rpip/dvc
    def default_targets(self):
        """Default targets for `dvc repro` and `dvc pipeline`."""
        from dvc.stage import Stage

        msg = "assuming default target '{}'.".format(Stage.STAGE_FILE)
        logger.warning(msg)
        return [Stage.STAGE_FILE]
コード例 #3
0
ファイル: project.py プロジェクト: raymondSeger/dvc
    def _collect_used_cache(self,
                            out,
                            branch=None,
                            remote=None,
                            force=False,
                            jobs=None):
        if not out.use_cache or not out.info:
            if not out.info:
                logger.warning("Output '{}'({}) is missing version "
                               "info. Cache for it will not be collected. "
                               "Use dvc repro to get your pipeline up to "
                               "date.".format(out, out.stage))
            return []

        info = out.dumpd()
        info["branch"] = branch
        ret = [info]

        if out.scheme != "local":
            return ret

        md5 = info[out.remote.PARAM_CHECKSUM]
        cache = self.cache.local.get(md5)
        if not out.remote.is_dir_cache(cache):
            return ret

        return self._collect_dir_cache(out,
                                       branch=branch,
                                       remote=remote,
                                       force=force,
                                       jobs=jobs)
コード例 #4
0
ファイル: base.py プロジェクト: ml-lab/dvc
    def supported(cls, config):
        url = config[Config.SECTION_REMOTE_URL]
        url_ok = cls.match(url) is not None
        deps_ok = all(cls.REQUIRES.values())
        if url_ok and not deps_ok:
            missing = [k for k, v in cls.REQUIRES.items() if v is None]
            logger.warning(
                "URL '{}' is supported but requires these missing "
                "dependencies: {}. If you have installed dvc using pip, "
                "choose one of these options to proceed: \n"
                "\n"
                "    1) Install specific missing dependencies:\n"
                "        pip install {}\n"
                "    2) Install dvc package that includes those missing "
                "dependencies: \n"
                "        pip install dvc[{}]\n"
                "    3) Install dvc package with all possible "
                "dependencies included: \n"
                "        pip install dvc[all]\n"
                "\n"
                "If you have installed dvc from a binary package and you "
                "are still seeing this message, please report it to us "
                "using https://github.com/iterative/dvc/issues. Thank you!".
                format(url, missing, " ".join(missing), cls.scheme))

        return url_ok and deps_ok
コード例 #5
0
def _get_diff_outs(self, diff_dct):
    self.tree = diff_dct[DIFF_A_TREE]
    a_outs = {str(out): out for st in self.stages() for out in st.outs}
    self.tree = diff_dct[DIFF_B_TREE]
    b_outs = {str(out): out for st in self.stages() for out in st.outs}
    outs_paths = set(a_outs.keys())
    outs_paths.update(b_outs.keys())
    results = {}
    non_local_cache = []
    for path in outs_paths:
        check1 = _check_local_cache(a_outs.get(path), non_local_cache)
        check2 = _check_local_cache(b_outs.get(path), non_local_cache)
        # skip files/directories with non-local cache for now
        if check1 or check2:
            continue
        results[path] = {}
        results[path][DIFF_A_OUTPUT] = a_outs.get(path)
        results[path][DIFF_B_OUTPUT] = b_outs.get(path)
        results[path][DIFF_IS_NEW] = path not in a_outs
        results[path][DIFF_DELETED] = path not in b_outs
        results[path][DIFF_IS_DIR] = _is_dir(path, a_outs, b_outs)
    if non_local_cache:
        logger.warning(
            "Diff is not supported for non-local outputs. Ignoring: {}".format(
                non_local_cache))

    return results
コード例 #6
0
ファイル: gc.py プロジェクト: guysmoilov/dvc
    def run(self):
        msg = "this will remove all cache except the cache that is used in "
        if not self.args.all_branches and not self.args.all_tags:
            msg += "the current git branch"
        elif self.args.all_branches and not self.args.all_tags:
            msg += "all git branches"
        elif not self.args.all_branches and self.args.all_tags:
            msg += "all git tags"
        else:
            msg += "all git branches and all git tags"

        if self.args.repos is not None and len(self.args.repos) > 0:
            msg += " of the current and the following repos:"

            for repo_path in self.args.repos:
                msg += "\n  - %s" % os.path.abspath(repo_path)
        else:
            msg += " of the current repo."

        logger.warning(msg)

        msg = "Are you sure you want to proceed?"
        if not self.args.force and not prompt.confirm(msg):
            return 1

        self.repo.gc(
            all_branches=self.args.all_branches,
            all_tags=self.args.all_tags,
            cloud=self.args.cloud,
            remote=self.args.remote,
            force=self.args.force,
            jobs=self.args.jobs,
            repos=self.args.repos,
        )
        return 0
コード例 #7
0
ファイル: data_cloud.py プロジェクト: siddygups/dvc
    def _init_cloud(self, cloud_config, cloud_type):
        global_storage_path = self._core.get(Config.SECTION_CORE_STORAGEPATH)
        if global_storage_path:
            logger.warning("using obsoleted config format. Consider updating.")

        cloud = cloud_type(self.repo, cloud_config)
        return cloud
コード例 #8
0
ファイル: local.py プロジェクト: ml-lab/dvc
    def save(self):
        if not self.use_cache:
            super(OutputLOCAL, self).save()
            self._verify_metric()
            msg = "Output '{}' doesn't use cache. Skipping saving."
            logger.info(msg.format(self.rel_path))
            return

        if not os.path.exists(self.path):
            raise self.DoesNotExistError(self.rel_path)

        if (not os.path.isfile(self.path) and not os.path.isdir(self.path)):
            raise self.IsNotFileOrDirError(self.rel_path)

        if (os.path.isfile(self.path) and os.path.getsize(self.path) == 0) or \
           (os.path.isdir(self.path) and len(os.listdir(self.path)) == 0):
            msg = "file/directory '{}' is empty.".format(self.rel_path)
            logger.warning(msg)

        if not self.changed():
            msg = "Output '{}' didn't change. Skipping saving."
            logger.info(msg.format(self.rel_path))
            return

        if self.is_local:
            if self.project.scm.is_tracked(self.path):
                raise OutputAlreadyTrackedError(self.rel_path)

            if self.use_cache:
                self.project.scm.ignore(self.path)

        self.info = self.project.cache.local.save(self.path_info)
コード例 #9
0
    def checkout(self, path_info, checksum_info, force=False):
        scheme = path_info["scheme"]
        if scheme not in ["", "local"] and scheme != self.scheme:
            raise NotImplementedError

        checksum = checksum_info.get(self.PARAM_CHECKSUM)
        if not checksum:
            msg = "No checksum info for '{}'."
            logger.info(msg.format(str(path_info)))
            return

        if not self.changed(path_info, checksum_info):
            msg = "Data '{}' didn't change."
            logger.info(msg.format(str(path_info)))
            return

        if self.changed_cache(checksum):
            msg = "Cache '{}' not found. File '{}' won't be created."
            logger.warning(msg.format(checksum, str(path_info)))
            self.safe_remove(path_info, force=force)
            return

        msg = "Checking out '{}' with cache '{}'."
        logger.info(msg.format(str(path_info), checksum))

        self.do_checkout(path_info, checksum, force=force)
コード例 #10
0
    def test_warning(self, caplog):
        with caplog.at_level(logging.INFO, logger="dvc"):
            logger.warning("message")

            expected = "{yellow}WARNING{nc}: message".format(**colors)

            assert expected == formatter.format(caplog.records[0])
コード例 #11
0
def checkout(self, target=None, with_deps=False, force=False, recursive=False):
    from dvc.stage import StageFileDoesNotExistError, StageFileBadNameError

    all_stages = self.stages()

    try:
        stages = self.collect(target, with_deps=with_deps, recursive=recursive)
    except (StageFileDoesNotExistError, StageFileBadNameError) as exc:
        if not target:
            raise
        raise CheckoutErrorSuggestGit(target, exc)

    with self.state:
        _cleanup_unused_links(self, all_stages)
        progress_callback = get_progress_callback(stages)

        for stage in stages:
            if stage.locked:
                logger.warning(
                    "DVC file '{path}' is locked. Its dependencies are"
                    " not going to be checked out.".format(path=stage.relpath)
                )

            stage.checkout(force=force, progress_callback=progress_callback)
        if progress_callback:
            progress_callback.finish("Checkout finished!")
コード例 #12
0
    def upload(self, from_infos, to_infos, names=None):
        names = self._verify_path_args(to_infos, from_infos, names)

        for from_info, to_info, name in zip(from_infos, to_infos, names):
            if to_info['scheme'] != self.scheme:
                raise NotImplementedError

            if from_info['scheme'] != 'local':
                raise NotImplementedError

            bucket = to_info['bucket']
            path = to_info['path']

            logger.debug("Uploading '{}' to '{}/{}'".format(
                from_info['path'], bucket, path))

            if not name:
                name = os.path.basename(from_info['path'])

            cb = Callback(name)

            try:
                self.blob_service.create_blob_from_path(bucket,
                                                        path,
                                                        from_info['path'],
                                                        progress_callback=cb)
            except Exception:
                msg = "failed to upload '{}'".format(from_info['path'])
                logger.warning(msg)
            else:
                progress.finish_target(name)
コード例 #13
0
ファイル: project.py プロジェクト: raymondSeger/dvc
    def checkout(self,
                 target=None,
                 with_deps=False,
                 force=False,
                 recursive=False):
        if target and not recursive:
            from dvc.stage import (
                StageFileDoesNotExistError,
                StageFileBadNameError,
            )

            all_stages = self.active_stages()
            try:
                stages = self._collect(target, with_deps=with_deps)
            except (StageFileDoesNotExistError, StageFileBadNameError) as exc:
                raise DvcException(
                    str(exc) +
                    " Did you mean 'git checkout {}'?".format(target))
        else:
            all_stages = self.active_stages(target)
            stages = all_stages

        with self.state:
            self._cleanup_unused_links(all_stages)

            for stage in stages:
                if stage.locked:
                    logger.warning(
                        "DVC file '{path}' is locked. Its dependencies are"
                        " not going to be checked out.".format(
                            path=stage.relpath))

                stage.checkout(force=force)
コード例 #14
0
    def save(self):
        if not os.path.exists(self.path):
            raise self.DoesNotExistError(self.rel_path)

        if not os.path.isfile(self.path) and not os.path.isdir(self.path):
            raise self.IsNotFileOrDirError(self.rel_path)

        if (os.path.isfile(self.path) and os.path.getsize(self.path) == 0) or (
                os.path.isdir(self.path) and len(os.listdir(self.path)) == 0):
            msg = "file/directory '{}' is empty.".format(self.rel_path)
            logger.warning(msg)

        if not self.use_cache:
            self.info = self.remote.save_info(self.path_info)
            self.verify_metric()
            if not self.IS_DEPENDENCY:
                msg = "Output '{}' doesn't use cache. Skipping saving."
                logger.info(msg.format(self.rel_path))
            return

        assert not self.IS_DEPENDENCY

        if not self.changed():
            msg = "Output '{}' didn't change. Skipping saving."
            logger.info(msg.format(self.rel_path))
            return

        if self.is_local:
            if self.repo.scm.is_tracked(self.path):
                raise OutputAlreadyTrackedError(self.rel_path)

            if self.use_cache:
                self.repo.scm.ignore(self.path)

        self.info = self.remote.save_info(self.path_info)
コード例 #15
0
ファイル: stage.py プロジェクト: yfarjoun/dvc
    def _changed_outs(self):
        for out in self.outs:
            if out.changed():
                logger.warning("Output '{out}' of '{stage}' changed.".format(
                    out=out, stage=self.relpath))
                return True

        return False
コード例 #16
0
    def do_checkout(self, path_info, checksum, force=False):
        if self.exists(path_info):
            msg = "data '{}' exists. Removing before checkout."
            logger.warning(msg.format(str(path_info)))
            self.safe_remove(path_info, force=force)

        from_info = self.checksum_to_path_info(checksum)
        self.copy(from_info, path_info)
コード例 #17
0
ファイル: test_logger.py プロジェクト: zeta1999/dvc
def test_logging_debug_with_datetime(caplog, dt):
    with caplog.at_level(logging.DEBUG, logger="dvc"):
        logger.warning("WARNING")
        logger.debug("DEBUG")
        logger.error("ERROR")

        for record in caplog.records:
            assert dt in formatter.format(record)
            assert record.levelname == record.message
コード例 #18
0
    def _changed_outs(self):
        for out in self.outs:
            if not out.changed():
                continue
            msg = "Output '{}' of '{}' changed.".format(out, self.relpath)
            logger.warning(msg)
            return True

        return False
コード例 #19
0
ファイル: local.py プロジェクト: tdeboissiere/dvc
 def changed_cache_file(self, md5):
     cache = self.get(md5)
     if self.state.changed(cache, md5=md5):
         if os.path.exists(cache):
             msg = "Corrupted cache file {}."
             logger.warning(msg.format(os.path.relpath(cache)))
             remove(cache)
         return True
     return False
コード例 #20
0
ファイル: stage.py プロジェクト: yfarjoun/dvc
    def _warn_if_fish(self, executable):  # pragma: no cover
        if (executable is None
                or os.path.basename(os.path.realpath(executable)) != "fish"):
            return

        logger.warning(
            "DVC detected that you are using fish as your default "
            "shell. Be aware that it might cause problems by overwriting "
            "your current environment variables with values defined "
            "in '.fishrc', which might affect your command. See "
            "https://github.com/iterative/dvc/issues/1307. ")
コード例 #21
0
ファイル: base.py プロジェクト: databill86/dvc
    def do_checkout(self, output, force=False, progress_callback=None):
        path_info = output.path_info
        checksum = output.info.get(self.PARAM_CHECKSUM)

        if self.exists(path_info):
            msg = "data '{}' exists. Removing before checkout."
            logger.warning(msg.format(str(path_info)))
            self.safe_remove(path_info, force=force)

        from_info = self.checksum_to_path_info(checksum)
        self.copy(from_info, path_info)
コード例 #22
0
ファイル: remove.py プロジェクト: rpip/dvc
def remove(self, tag, target=None, with_deps=False, recursive=False):
    stages = self.collect(target, with_deps=with_deps, recursive=recursive)
    for stage in stages:
        changed = False
        for out in stage.outs:
            if tag not in out.tags.keys():
                logger.warning("tag '{}' not found for '{}'".format(tag, out))
                continue
            del out.tags[tag]
            changed = True
        if changed:
            stage.dump()
コード例 #23
0
ファイル: add.py プロジェクト: rpip/dvc
def add(self, tag, target=None, with_deps=False, recursive=False):
    stages = self.collect(target, with_deps=with_deps, recursive=recursive)
    for stage in stages:
        changed = False
        for out in stage.outs:
            if not out.info:
                logger.warning("missing checksum info for '{}'".format(out))
                continue
            out.tags[tag] = out.info.copy()
            changed = True
        if changed:
            stage.dump()
コード例 #24
0
ファイル: data_cloud.py プロジェクト: siddygups/dvc
    def _cloud(self):
        remote = self._core.get(Config.SECTION_CORE_REMOTE, "")
        if remote != "":
            return self._init_remote(remote)

        if self._core.get(Config.SECTION_CORE_CLOUD, None):
            # backward compatibility
            msg = "using obsoleted config format. Consider updating."
            logger.warning(msg)
            return self._init_compat()

        return None
コード例 #25
0
ファイル: __init__.py プロジェクト: databill86/dvc
    def used_cache(
        self,
        target=None,
        all_branches=False,
        active=True,
        with_deps=False,
        all_tags=False,
        remote=None,
        force=False,
        jobs=None,
        recursive=False,
    ):
        cache = {}
        cache["local"] = []
        cache["s3"] = []
        cache["gs"] = []
        cache["hdfs"] = []
        cache["ssh"] = []
        cache["azure"] = []

        for branch in self.scm.brancher(
            all_branches=all_branches, all_tags=all_tags
        ):
            if target:
                if recursive:
                    stages = self.stages(target)
                else:
                    stages = self.collect(target, with_deps=with_deps)
            elif active:
                stages = self.active_stages()
            else:
                stages = self.stages()

            for stage in stages:
                if active and not target and stage.locked:
                    logger.warning(
                        "DVC file '{path}' is locked. Its dependencies are"
                        " not going to be pushed/pulled/fetched.".format(
                            path=stage.relpath
                        )
                    )

                for out in stage.outs:
                    scheme = out.path_info["scheme"]
                    cache[scheme] += self._collect_used_cache(
                        out,
                        branch=branch,
                        remote=remote,
                        force=force,
                        jobs=jobs,
                    )

        return cache
コード例 #26
0
    def _changed_outs(self):
        for out in self.outs:
            status = out.status()
            if status:
                logger.warning(
                    "Output '{out}' of '{stage}' changed because it is "
                    "'{status}'".format(out=out,
                                        stage=self.relpath,
                                        status=status[str(out)]))
                return True

        return False
コード例 #27
0
    def dump(self):
        """Saves state database."""
        assert self.database is not None

        cmd = "SELECT count from {} WHERE rowid={}"
        self._execute(cmd.format(self.STATE_INFO_TABLE, self.STATE_INFO_ROW))
        ret = self._fetchall()
        assert len(ret) == 1
        assert len(ret[0]) == 1
        count = self._from_sqlite(ret[0][0]) + self.inserts

        if count > self.row_limit:
            msg = "cleaning up state, this might take a while."
            logger.warning(msg)

            delete = count - self.row_limit
            delete += int(self.row_limit * (self.row_cleanup_quota / 100.0))
            cmd = (
                "DELETE FROM {} WHERE timestamp IN ("
                "SELECT timestamp FROM {} ORDER BY timestamp ASC LIMIT {});"
            )
            self._execute(
                cmd.format(self.STATE_TABLE, self.STATE_TABLE, delete)
            )

            self._vacuum()

            cmd = "SELECT COUNT(*) FROM {}"

            self._execute(cmd.format(self.STATE_TABLE))
            ret = self._fetchall()
            assert len(ret) == 1
            assert len(ret[0]) == 1
            count = ret[0][0]

        cmd = "UPDATE {} SET count = {} WHERE rowid = {}"
        self._execute(
            cmd.format(
                self.STATE_INFO_TABLE,
                self._to_sqlite(count),
                self.STATE_INFO_ROW,
            )
        )

        self._update_cache_directory_state()

        self.database.commit()
        self.cursor.close()
        self.database.close()
        self.database = None
        self.cursor = None
        self.inserts = 0
コード例 #28
0
ファイル: local.py プロジェクト: evander-dacosta/dvc
 def _log_missing_caches(self, checksum_info_dict):
     missing_caches = [(md5, info)
                       for md5, info in checksum_info_dict.items()
                       if info["status"] == STATUS_MISSING]
     if missing_caches:
         missing_desc = "".join([
             "\nname: {}, md5: {}".format(info["name"], md5)
             for md5, info in missing_caches
         ])
         msg = (
             "Some of the cache files do not exist neither locally "
             "nor on remote. Missing cache files: {}".format(missing_desc))
         logger.warning(msg)
コード例 #29
0
ファイル: temp_git_repo.py プロジェクト: rpip/dvc
    def persist_to(self, module_dir, parent_repo):
        if not self.is_state_set:
            raise TempRepoException(self, "cannot persist")

        tmp_repo_cache = self.repo.cache.local.url

        for prefix in os.listdir(tmp_repo_cache):
            if len(prefix) != 2:
                logger.warning("wrong dir format in cache {}: dir {}".format(
                    tmp_repo_cache, prefix))
            self._move_all_cache_files(parent_repo, prefix, tmp_repo_cache)

        shutil.move(self._cloned_tmp_dir, module_dir)
        self._reset_state()
コード例 #30
0
ファイル: local.py プロジェクト: ml-lab/dvc
    def save(self):
        if not self.exists:
            raise self.DoesNotExistError(self.rel_path)

        if not os.path.isfile(self.path) \
           and not os.path.isdir(self.path):  # pragma: no cover
            raise self.IsNotFileOrDirError(self.rel_path)

        if (os.path.isfile(self.path) and os.path.getsize(self.path) == 0) or \
           (os.path.isdir(self.path) and len(os.listdir(self.path)) == 0):
            msg = "file/directory '{}' is empty.".format(self.rel_path)
            logger.warning(msg)

        self.info = self.remote.save_info(self.path_info)