コード例 #1
0
ファイル: test_show.py プロジェクト: vishalbelsare/dvc
def test_show_running_workspace(tmp_dir, scm, dvc, exp_stage, capsys):
    pid_dir = os.path.join(dvc.tmp_dir, dvc.experiments.EXEC_PID_DIR)
    makedirs(pid_dir, True)
    info = ExecutorInfo(None, None, None, BaseExecutor.DEFAULT_LOCATION)
    pidfile = os.path.join(pid_dir, f"workspace{BaseExecutor.PIDFILE_EXT}")
    (tmp_dir / pidfile).dump(info.to_dict())

    assert dvc.experiments.show()["workspace"] == {
        "baseline": {
            "data": {
                "metrics": {"metrics.yaml": {"data": {"foo": 1}}},
                "params": {"params.yaml": {"data": {"foo": 1}}},
                "queued": False,
                "running": True,
                "executor": info.location,
                "timestamp": None,
            }
        }
    }

    capsys.readouterr()
    assert main(["exp", "show", "--no-pager"]) == 0
    cap = capsys.readouterr()
    assert "Running" in cap.out
    assert info.location in cap.out
コード例 #2
0
ファイル: index.py プロジェクト: ush98/dvc
    def load(self):
        """(Re)load this index database."""
        retries = 1
        while True:
            assert self.database is None
            assert self.cursor is None

            empty = not os.path.isfile(self.path)
            makedirs(os.path.dirname(self.path), exist_ok=True)
            self.database = _connect_sqlite(self.path, {"nolock": 1})
            self.cursor = self.database.cursor()

            try:
                self._prepare_db(empty=empty)
                return
            except sqlite3.DatabaseError:
                self.cursor.close()
                self.database.close()
                self.database = None
                self.cursor = None
                if retries > 0:
                    os.unlink(self.path)
                    retries -= 1
                else:
                    raise
コード例 #3
0
ファイル: __init__.py プロジェクト: johnnychen94/dvc
    def _get_repo_dirs(
        self,
        root_dir: str = None,
        scm: Base = None,
        rev: str = None,
        uninitialized: bool = False,
    ):
        assert bool(scm) == bool(rev)

        from dvc.scm import SCM
        from dvc.scm.git import Git
        from dvc.utils.fs import makedirs

        dvc_dir = None
        tmp_dir = None
        try:
            tree = scm.get_tree(rev) if isinstance(scm, Git) and rev else None
            root_dir = self.find_root(root_dir, tree)
            dvc_dir = os.path.join(root_dir, self.DVC_DIR)
            tmp_dir = os.path.join(dvc_dir, "tmp")
            makedirs(tmp_dir, exist_ok=True)
        except NotDvcRepoError:
            if not uninitialized:
                raise

            try:
                scm = SCM(root_dir or os.curdir)
            except (SCMError, InvalidGitRepositoryError):
                scm = SCM(os.curdir, no_scm=True)

            assert isinstance(scm, Base)
            root_dir = scm.root_dir

        return root_dir, dvc_dir, tmp_dir
コード例 #4
0
ファイル: __init__.py プロジェクト: gbiagomba/dvc
    def __init__(self, root_dir=None, scm=None, rev=None):
        from dvc.state import State, StateNoop
        from dvc.lock import make_lock
        from dvc.scm import SCM
        from dvc.cache import Cache
        from dvc.data_cloud import DataCloud
        from dvc.repo.metrics import Metrics
        from dvc.repo.plots import Plots
        from dvc.repo.params import Params
        from dvc.scm.tree import WorkingTree
        from dvc.utils.fs import makedirs
        from dvc.stage.cache import StageCache

        if scm:
            # use GitTree instead of WorkingTree as default repo tree instance
            tree = scm.get_tree(rev)
            self.root_dir = self.find_root(root_dir, tree)
            self.scm = scm
            self.tree = tree
            self.state = StateNoop()
        else:
            root_dir = self.find_root(root_dir)
            self.root_dir = os.path.abspath(os.path.realpath(root_dir))
            self.tree = WorkingTree(self.root_dir)

        self.dvc_dir = os.path.join(self.root_dir, self.DVC_DIR)
        self.config = Config(self.dvc_dir, tree=self.tree)

        if not scm:
            no_scm = self.config["core"].get("no_scm", False)
            self.scm = SCM(self.root_dir, no_scm=no_scm)

        self.tmp_dir = os.path.join(self.dvc_dir, "tmp")
        self.index_dir = os.path.join(self.tmp_dir, "index")
        makedirs(self.index_dir, exist_ok=True)

        hardlink_lock = self.config["core"].get("hardlink_lock", False)
        self.lock = make_lock(
            os.path.join(self.tmp_dir, "lock"),
            tmp_dir=self.tmp_dir,
            hardlink_lock=hardlink_lock,
            friendly=True,
        )

        self.cache = Cache(self)
        self.cloud = DataCloud(self)

        if not scm:
            # NOTE: storing state and link_state in the repository itself to
            # avoid any possible state corruption in 'shared cache dir'
            # scenario.
            self.state = State(self.cache.local)

        self.stage_cache = StageCache(self)

        self.metrics = Metrics(self)
        self.plots = Plots(self)
        self.params = Params(self)

        self._ignore()
コード例 #5
0
    def _get_database_dir(self, db_name):
        # NOTE: by default, store SQLite-based remote indexes and state's
        # `links` and `md5s` caches in the repository itself to avoid any
        # possible state corruption in 'shared cache dir' scenario, but allow
        # user to override this through config when, say, the repository is
        # located on a mounted volume — see
        # https://github.com/iterative/dvc/issues/4420
        base_db_dir = self.config.get(db_name, {}).get("dir", None)
        if not base_db_dir:
            return self.tmp_dir

        import hashlib

        from dvc.utils.fs import makedirs

        root_dir_hash = hashlib.sha224(
            self.root_dir.encode("utf-8")
        ).hexdigest()

        db_dir = os.path.join(
            base_db_dir,
            self.DVC_DIR,
            f"{os.path.basename(self.root_dir)}-{root_dir_hash[0:7]}",
        )

        makedirs(db_dir, exist_ok=True)
        return db_dir
コード例 #6
0
def test_subrepos_are_ignored(tmp_dir, erepo_dir):
    subrepo = erepo_dir / "dir" / "subrepo"
    make_subrepo(subrepo, erepo_dir.scm)
    with erepo_dir.chdir():
        erepo_dir.dvc_gen("dir/foo", "foo", commit="foo")
        erepo_dir.scm_gen("dir/bar", "bar", commit="bar")

    with subrepo.chdir():
        subrepo.dvc_gen({"file": "file"}, commit="add files on subrepo")

    with external_repo(os.fspath(erepo_dir)) as repo:
        repo.get_external("dir", "out")
        expected_files = {"foo": "foo", "bar": "bar", ".gitignore": "/foo\n"}
        assert (tmp_dir / "out").read_text() == expected_files

        expected_hash = HashInfo("md5", "e1d9e8eae5374860ae025ec84cfd85c7.dir")
        assert (repo.repo_tree.get_hash(
            os.path.join(repo.root_dir,
                         "dir"), follow_subrepos=False) == expected_hash)

        # clear cache to test `fetch_external` again
        cache_dir = tmp_dir / repo.cache.local.cache_dir
        remove(cache_dir)
        makedirs(cache_dir)

        assert repo.fetch_external(["dir"]) == (
            len(expected_files),
            0,
            [expected_hash],
        )
コード例 #7
0
    def dump_json(self, filename: str):
        from dvc.utils.fs import makedirs
        from dvc.utils.serialize import modify_json

        makedirs(os.path.dirname(filename), exist_ok=True)
        with modify_json(filename) as d:
            d.update(self.asdict())
コード例 #8
0
    def _init_executors(self, to_run):
        from dvc.utils.fs import makedirs

        from .executor.local import TempDirExecutor

        executors = {}
        base_tmp_dir = os.path.join(self.repo.tmp_dir, self.EXEC_TMP_DIR)
        if not os.path.exists(base_tmp_dir):
            makedirs(base_tmp_dir)
        for stash_rev, item in to_run.items():
            self.scm.set_ref(EXEC_HEAD, item.rev)
            self.scm.set_ref(EXEC_MERGE, stash_rev)
            self.scm.set_ref(EXEC_BASELINE, item.baseline_rev)

            # Executor will be initialized with an empty git repo that
            # we populate by pushing:
            #   EXEC_HEAD - the base commit for this experiment
            #   EXEC_MERGE - the unmerged changes (from our stash)
            #       to be reproduced
            #   EXEC_BASELINE - the baseline commit for this experiment
            executor = TempDirExecutor(
                self.scm,
                self.dvc_dir,
                name=item.name,
                branch=item.branch,
                tmp_dir=base_tmp_dir,
                cache_dir=self.repo.odb.local.cache_dir,
            )
            executors[stash_rev] = executor

        for ref in (EXEC_HEAD, EXEC_MERGE, EXEC_BASELINE):
            self.scm.remove_ref(ref)

        return executors
コード例 #9
0
ファイル: analytics.py プロジェクト: woodshop/dvc
def _find_or_create_user_id():
    """
    The user's ID is stored on a file under the global config directory.

    The file should contain a JSON with a "user_id" key:

        {"user_id": "16fd2706-8baf-433b-82eb-8c7fada847da"}

    IDs are generated randomly with UUID.
    """
    config_dir = Config.get_global_config_dir()
    fname = os.path.join(config_dir, "user_id")
    lockfile = os.path.join(config_dir, "user_id.lock")

    # Since the `fname` and `lockfile` are under the global config,
    # we need to make sure such directory exist already.
    makedirs(config_dir, exist_ok=True)

    try:
        with Lock(lockfile):
            try:
                with open(fname, "r") as fobj:
                    user_id = json.load(fobj)["user_id"]

            except (FileNotFoundError, ValueError, KeyError):
                user_id = str(uuid.uuid4())

                with open(fname, "w") as fobj:
                    json.dump({"user_id": user_id}, fobj)

            return user_id

    except LockError:
        logger.debug(
            "Failed to acquire '{lockfile}'".format(lockfile=lockfile))
コード例 #10
0
ファイル: template.py プロジェクト: zang3tsu/dvc
    def __init__(self, dvc_dir):
        self.dvc_dir = dvc_dir

        if not os.path.exists(self.templates_dir):
            makedirs(self.templates_dir, exist_ok=True)
            for t in self.TEMPLATES:
                self.dump(t())
コード例 #11
0
    def _get_repo_dirs(
        self,
        root_dir: str = None,
        fs: "FileSystem" = None,
        uninitialized: bool = False,
    ):
        from dvc.scm import SCM, Base, SCMError
        from dvc.utils.fs import makedirs

        dvc_dir = None
        tmp_dir = None
        try:
            root_dir = self.find_root(root_dir, fs)
            dvc_dir = os.path.join(root_dir, self.DVC_DIR)
            tmp_dir = os.path.join(dvc_dir, "tmp")
            makedirs(tmp_dir, exist_ok=True)
        except NotDvcRepoError:
            if not uninitialized:
                raise

            try:
                scm = SCM(root_dir or os.curdir)
            except SCMError:
                scm = SCM(os.curdir, no_scm=True)

            assert isinstance(scm, Base)
            root_dir = scm.root_dir

        return root_dir, dvc_dir, tmp_dir
コード例 #12
0
    def _get_repo_dirs(
        self,
        root_dir: str = None,
        scm: "Base" = None,
        rev: str = None,
        uninitialized: bool = False,
    ):
        assert bool(scm) == bool(rev)

        from dvc.fs.scm import GitFileSystem
        from dvc.scm import SCM, Base, Git, SCMError
        from dvc.utils.fs import makedirs

        dvc_dir = None
        tmp_dir = None
        try:
            fs = (GitFileSystem(scm=scm, rev=rev)
                  if isinstance(scm, Git) and rev else None)
            root_dir = self.find_root(root_dir, fs)
            dvc_dir = os.path.join(root_dir, self.DVC_DIR)
            tmp_dir = os.path.join(dvc_dir, "tmp")
            makedirs(tmp_dir, exist_ok=True)
        except NotDvcRepoError:
            if not uninitialized:
                raise

            try:
                scm = SCM(root_dir or os.curdir)
            except SCMError:
                scm = SCM(os.curdir, no_scm=True)

            assert isinstance(scm, Base)
            root_dir = scm.root_dir

        return root_dir, dvc_dir, tmp_dir
コード例 #13
0
ファイル: test_fs.py プロジェクト: growupboron/dvc
def test_makedirs(tmp_dir):
    path = os.path.join(tmp_dir, "directory")
    path_info = PathInfo(os.path.join(tmp_dir, "another", "directory"))

    makedirs(path)
    assert os.path.isdir(path)

    makedirs(path_info)
    assert os.path.isdir(path_info)
コード例 #14
0
ファイル: local.py プロジェクト: rpatil524/dvc
 def put_file(self,
              from_file,
              to_info,
              callback=DEFAULT_CALLBACK,
              **kwargs):
     makedirs(to_info.parent, exist_ok=True)
     tmp_file = tmp_fname(to_info)
     copyfile(from_file, tmp_file, callback=callback)
     os.replace(tmp_file, to_info)
コード例 #15
0
    def _upload(
        self, from_file, to_info, name=None, no_progress_bar=False, **_kwargs
    ):
        makedirs(to_info.parent, exist_ok=True)

        tmp_file = tmp_fname(to_info)
        copyfile(
            from_file, tmp_file, name=name, no_progress_bar=no_progress_bar
        )
        os.rename(tmp_file, fspath_py35(to_info))
コード例 #16
0
def test_get_to_dir(tmp_dir, erepo_dir, dname):
    with erepo_dir.chdir():
        erepo_dir.dvc_gen("file", "contents", commit="create file")

    makedirs(dname, exist_ok=True)

    Repo.get(fspath(erepo_dir), "file", dname)

    assert (tmp_dir / dname).is_dir()
    assert (tmp_dir / dname / "file").read_text() == "contents"
コード例 #17
0
ファイル: local.py プロジェクト: nik123/dvc
 def put_file(self,
              from_file,
              to_info,
              callback=DEFAULT_CALLBACK,
              **kwargs):
     parent = self.path.parent(to_info)
     makedirs(parent, exist_ok=True)
     tmp_file = self.path.join(parent, tmp_fname())
     copyfile(from_file, tmp_file, callback=callback)
     os.replace(tmp_file, to_info)
コード例 #18
0
    def download(self, hdfs_path, local_path, **kwargs):
        from dvc.utils.fs import makedirs

        kwargs.setdefault("chunk_size", 2**16)

        makedirs(os.path.dirname(local_path), exist_ok=True)
        with open(local_path, "wb") as writer:
            with self.read(hdfs_path, **kwargs) as reader:
                for chunk in reader:
                    writer.write(chunk)
コード例 #19
0
    def make_tpi(self, name: str):
        from tpi import TPIError
        from tpi.terraform import TerraformBackend as TPIBackend

        try:
            working_dir = os.path.join(self.tmp_dir, name)
            makedirs(working_dir, exist_ok=True)
            yield TPIBackend(working_dir=working_dir)
        except TPIError as exc:
            raise DvcException("TPI operation failed") from exc
コード例 #20
0
ファイル: test_fs.py プロジェクト: zeta1999/dvc
def test_makedirs_permissions(tmp_dir):
    dir_mode = 0o755
    intermediate_dir = "тестовая-директория"
    test_dir = os.path.join(intermediate_dir, "data")

    assert not os.path.exists(intermediate_dir)

    makedirs(test_dir, mode=dir_mode)

    assert stat.S_IMODE(os.stat(test_dir).st_mode) == dir_mode
    assert stat.S_IMODE(os.stat(intermediate_dir).st_mode) == dir_mode
コード例 #21
0
ファイル: base.py プロジェクト: stjordanis/dvc
    def _download_file(self, from_info, to_info, name, no_progress_bar):
        makedirs(to_info.parent, exist_ok=True)

        logger.debug("Downloading '%s' to '%s'", from_info, to_info)
        name = name or to_info.name

        tmp_file = tmp_fname(to_info)

        self._download(  # noqa, pylint: disable=no-member
            from_info, tmp_file, name=name, no_progress_bar=no_progress_bar
        )

        move(tmp_file, to_info)
コード例 #22
0
def test_import_to_dir(dname, tmp_dir, dvc, erepo_dir):
    makedirs(dname, exist_ok=True)

    with erepo_dir.chdir():
        erepo_dir.dvc_gen("foo", "foo content", commit="create foo")

    stage = dvc.imp(os.fspath(erepo_dir), "foo", dname)

    dst = os.path.join(dname, "foo")

    assert stage.outs[0].fspath == os.path.abspath(dst)
    assert os.path.isdir(dname)
    assert (tmp_dir / dst).read_text() == "foo content"
コード例 #23
0
    def pack_repro_args(path, *args, tree=None, **kwargs):
        dpath = os.path.dirname(path)
        if tree:
            open_func = tree.open
            tree.makedirs(dpath)
        else:
            from dvc.utils.fs import makedirs

            open_func = open
            makedirs(dpath, exist_ok=True)
        data = {"args": args, "kwargs": kwargs}
        with open_func(path, "wb") as fobj:
            pickle.dump(data, fobj)
コード例 #24
0
ファイル: test_import_url.py プロジェクト: rgvanwesep/dvc
def test_import_url_to_dir(dname, tmp_dir, dvc):
    tmp_dir.gen({"data_dir": {"file": "file content"}})
    src = os.path.join("data_dir", "file")

    makedirs(dname, exist_ok=True)

    stage = dvc.imp_url(src, dname)

    dst = tmp_dir / dname / "file"

    assert stage.outs[0].fs_path == os.fspath(dst)
    assert os.path.isdir(dname)
    assert dst.read_text() == "file content"
コード例 #25
0
ファイル: manager.py プロジェクト: pared/dvc
    def __init__(
        self,
        scm: "Git",
        wdir: str,
    ):
        from dvc.utils.fs import makedirs

        self.scm = scm
        makedirs(wdir, exist_ok=True)
        self.wdir = wdir
        self.proc = ProcessManager(self.pid_dir)
        self._attached: Dict[str, "BaseExecutor"] = {}
        self._detached: Dict[str, "BaseExecutor"] = dict(self._load_infos())
        self._queue: Deque[Tuple[str, "BaseExecutor"]] = deque()
コード例 #26
0
ファイル: index.py プロジェクト: efiop/dvc
    def __init__(
        self,
        tmp_dir: "StrPath",
        name: str,
    ):  # pylint: disable=super-init-not-called
        from diskcache import Index

        from dvc.fs.local import LocalFileSystem
        from dvc.utils.fs import makedirs

        self.index_dir = os.path.join(tmp_dir, self.INDEX_DIR, name)
        makedirs(self.index_dir, exist_ok=True)
        self.fs = LocalFileSystem()
        self.index = Index(self.index_dir)
コード例 #27
0
ファイル: __init__.py プロジェクト: pratikfalke/dvc
    def __init__(self, root_dir=None):
        from dvc.state import State
        from dvc.lock import make_lock
        from dvc.scm import SCM
        from dvc.cache import Cache
        from dvc.data_cloud import DataCloud
        from dvc.repo.metrics import Metrics
        from dvc.scm.tree import WorkingTree
        from dvc.repo.tag import Tag
        from dvc.utils.fs import makedirs

        root_dir = self.find_root(root_dir)

        self.root_dir = os.path.abspath(os.path.realpath(root_dir))
        self.dvc_dir = os.path.join(self.root_dir, self.DVC_DIR)

        self.config = Config(self.dvc_dir)

        self.scm = SCM(self.root_dir)

        self.tree = WorkingTree(self.root_dir)

        self.tmp_dir = os.path.join(self.dvc_dir, "tmp")
        makedirs(self.tmp_dir, exist_ok=True)

        hardlink_lock = self.config.config["core"].get("hardlink_lock", False)
        self.lock = make_lock(
            os.path.join(self.dvc_dir, "lock"),
            tmp_dir=os.path.join(self.dvc_dir, "tmp"),
            hardlink_lock=hardlink_lock,
            friendly=True,
        )

        # NOTE: storing state and link_state in the repository itself to avoid
        # any possible state corruption in 'shared cache dir' scenario.
        self.state = State(self, self.config.config)

        core = self.config.config[Config.SECTION_CORE]

        level = core.get(Config.SECTION_CORE_LOGLEVEL)
        if level:
            logger.setLevel(level.upper())

        self.cache = Cache(self)
        self.cloud = DataCloud(self)

        self.metrics = Metrics(self)
        self.tag = Tag(self)

        self._ignore()
コード例 #28
0
ファイル: template.py プロジェクト: growupboron/dvc
    def dump(self):
        makedirs(self.plot_templates_dir, exist_ok=True)

        with open(
                os.path.join(self.plot_templates_dir,
                             self.TEMPLATE_NAME + self.EXTENSION),
                "w",
        ) as fobj:
            json.dump(
                self.DEFAULT_CONTENT,
                fobj,
                indent=self.INDENT,
                separators=self.SEPARATORS,
            )
コード例 #29
0
def test_show_running_workspace(tmp_dir, scm, dvc, exp_stage, capsys):
    pid_dir = os.path.join(dvc.tmp_dir, EXEC_TMP_DIR, EXEC_PID_DIR)
    info = make_executor_info(location=BaseExecutor.DEFAULT_LOCATION)
    pidfile = os.path.join(
        pid_dir,
        "workspace",
        f"workspace{BaseExecutor.INFOFILE_EXT}",
    )
    makedirs(os.path.dirname(pidfile), True)
    (tmp_dir / pidfile).dump_json(info.asdict())

    assert dvc.experiments.show()["workspace"] == {
        "baseline": {
            "data": {
                "deps": {
                    "copy.py": {
                        "hash": ANY,
                        "size": ANY,
                        "nfiles": None,
                    }
                },
                "metrics": {
                    "metrics.yaml": {
                        "data": {
                            "foo": 1
                        }
                    }
                },
                "params": {
                    "params.yaml": {
                        "data": {
                            "foo": 1
                        }
                    }
                },
                "outs": {},
                "queued": False,
                "running": True,
                "executor": info.location,
                "timestamp": None,
            }
        }
    }

    capsys.readouterr()
    assert main(["exp", "show", "--csv"]) == 0
    cap = capsys.readouterr()
    assert "Running" in cap.out
    assert info.location in cap.out
コード例 #30
0
ファイル: __init__.py プロジェクト: rogervaas/dvc
    def __init__(self, root_dir=None):
        from dvc.state import State
        from dvc.lock import make_lock
        from dvc.scm import SCM
        from dvc.cache import Cache
        from dvc.data_cloud import DataCloud
        from dvc.repo.metrics import Metrics
        from dvc.repo.params import Params
        from dvc.scm.tree import WorkingTree
        from dvc.utils.fs import makedirs
        from dvc.stage.cache import StageCache

        root_dir = self.find_root(root_dir)

        self.root_dir = os.path.abspath(os.path.realpath(root_dir))
        self.dvc_dir = os.path.join(self.root_dir, self.DVC_DIR)

        self.config = Config(self.dvc_dir)

        no_scm = self.config["core"].get("no_scm", False)
        self.scm = SCM(self.root_dir, no_scm=no_scm)

        self.tree = WorkingTree(self.root_dir)

        self.tmp_dir = os.path.join(self.dvc_dir, "tmp")
        self.index_dir = os.path.join(self.tmp_dir, "index")
        makedirs(self.index_dir, exist_ok=True)

        hardlink_lock = self.config["core"].get("hardlink_lock", False)
        self.lock = make_lock(
            os.path.join(self.tmp_dir, "lock"),
            tmp_dir=self.tmp_dir,
            hardlink_lock=hardlink_lock,
            friendly=True,
        )

        # NOTE: storing state and link_state in the repository itself to avoid
        # any possible state corruption in 'shared cache dir' scenario.
        self.state = State(self)

        self.cache = Cache(self)
        self.cloud = DataCloud(self)

        self.stage_cache = StageCache(self.cache.local.cache_dir)

        self.metrics = Metrics(self)
        self.params = Params(self)

        self._ignore()