Example #1
0
def test_path_isin_accepts_pathinfo():
    child = os.path.join("path", "to", "folder")
    parent = PathInfo(child) / ".."

    assert path_isin(child, parent)
    # pylint: disable=arguments-out-of-order
    assert not path_isin(parent, child)
Example #2
0
def test_path_isin_positive():
    child = os.path.join("path", "to", "folder")

    assert path_isin(child, os.path.join("path", "to", ""))
    assert path_isin(child, os.path.join("path", "to"))
    assert path_isin(child, os.path.join("path", ""))
    assert path_isin(child, os.path.join("path"))
Example #3
0
def test_path_isin_on_same_path():
    path = os.path.join("path", "to", "folder")
    path_with_sep = os.path.join(path, "")

    assert not path_isin(path, path)
    assert not path_isin(path, path_with_sep)
    assert not path_isin(path_with_sep, path)
    assert not path_isin(path_with_sep, path_with_sep)
Example #4
0
    def collect(self, target, with_deps=False, recursive=False, graph=None):
        import networkx as nx
        from dvc.stage import Stage

        G = graph or self.graph

        if not target:
            return get_stages(G)

        target = os.path.abspath(target)

        if recursive and os.path.isdir(target):
            attrs = nx.get_node_attributes(G, "stage")
            nodes = [node for node in nx.dfs_postorder_nodes(G)]

            ret = []
            for node in nodes:
                stage = attrs[node]
                if path_isin(stage.path, target):
                    ret.append(stage)
            return ret

        stage = Stage.load(self, target)
        if not with_deps:
            return [stage]

        node = relpath(stage.path, self.root_dir)
        pipeline = get_pipeline(get_pipelines(G), node)

        return [
            pipeline.node[n]["stage"]
            for n in nx.dfs_postorder_nodes(pipeline, node)
        ]
Example #5
0
    def __init__(self, stage, path, *args, **kwargs):
        if stage and path_isin(path, stage.repo.root_dir):
            path = relpath(path, stage.wdir)

        super().__init__(stage, path, *args, **kwargs)
        if self.is_in_repo and self.repo and is_working_tree(self.repo.tree):
            self.tree = self.repo.tree
Example #6
0
    def _ignore(self):
        flist = [self.config.files["local"], self.tmp_dir]

        if path_isin(self.cache.local.cache_dir, self.root_dir):
            flist += [self.cache.local.cache_dir]

        self.scm.ignore_list(flist)
Example #7
0
def imp_url(self, url, out=None, fname=None, erepo=None, locked=True):
    from dvc.dvcfile import Dvcfile
    from dvc.stage import Stage

    out = resolve_output(url, out)
    path, wdir, out = resolve_paths(self, out)

    # NOTE: when user is importing something from within his own repository
    if os.path.exists(url) and path_isin(os.path.abspath(url), self.root_dir):
        url = relpath(url, wdir)

    stage = Stage.create(
        self,
        fname or path,
        wdir=wdir,
        deps=[url],
        outs=[out],
        erepo=erepo,
    )

    if stage is None:
        return None

    dvcfile = Dvcfile(self, stage.path)
    dvcfile.overwrite_with_prompt(force=True)

    self.check_modified_graph([stage])

    stage.run()

    stage.locked = locked

    dvcfile.dump(stage)

    return stage
Example #8
0
def imp_url(
    self,
    url,
    out=None,
    fname=None,
    erepo=None,
    frozen=True,
    no_exec=False,
    desc=None,
    jobs=None,
):
    from dvc.dvcfile import Dvcfile
    from dvc.stage import Stage, create_stage, restore_meta

    out = resolve_output(url, out)
    path, wdir, out = resolve_paths(self, out)

    # NOTE: when user is importing something from within their own repository
    if (
        erepo is None
        and os.path.exists(url)
        and path_isin(os.path.abspath(url), self.root_dir)
    ):
        url = relpath(url, wdir)

    stage = create_stage(
        Stage,
        self,
        fname or path,
        wdir=wdir,
        deps=[url],
        outs=[out],
        erepo=erepo,
    )
    restore_meta(stage)
    if stage.can_be_skipped:
        return None

    if desc:
        stage.outs[0].desc = desc

    dvcfile = Dvcfile(self, stage.path)
    dvcfile.remove()

    try:
        self.check_modified_graph([stage])
    except OutputDuplicationError as exc:
        raise OutputDuplicationError(exc.output, set(exc.stages) - {stage})

    if no_exec:
        stage.ignore_outs()
    else:
        stage.run(jobs=jobs)

    stage.frozen = frozen

    dvcfile.dump(stage)

    return stage
Example #9
0
    def _ignore(self):
        flist = [self.config.files["local"], self.tmp_dir]

        if path_isin(self.odb.local.cache_dir, self.root_dir):
            flist += [self.odb.local.cache_dir]

        for file in flist:
            self.scm_context.ignore(file)
Example #10
0
    def __init__(self, stage, path, *args, **kwargs):
        if stage and path_isin(path, stage.repo.root_dir):
            path = relpath(path, stage.wdir)

        super().__init__(stage, path, *args, **kwargs)
        if (self.is_in_repo and self.repo
                and isinstance(self.repo.fs, LocalFileSystem)):
            self.fs = self.repo.fs
Example #11
0
    def __str__(self):
        if not self.is_in_repo:
            return str(self.def_path)

        cur_dir = os.getcwd()
        if path_isin(cur_dir, self.repo.root_dir):
            return relpath(self.path_info, cur_dir)

        return relpath(self.path_info, self.repo.root_dir)
Example #12
0
    def is_in_repo(self):
        if urlparse(self.def_path).scheme == "remote":
            return False

        if os.path.isabs(self.def_path):
            return False

        return self.repo and path_isin(os.path.realpath(self.path_info),
                                       self.repo.root_dir)
Example #13
0
    def __str__(self):
        if (not self.repo or urlparse(self.def_path).scheme == "remote"
                or os.path.isabs(self.def_path)):
            return str(self.def_path)

        cur_dir = os.getcwd()
        if path_isin(cur_dir, self.repo.root_dir):
            return relpath(self.path_info, cur_dir)

        return relpath(self.path_info, self.repo.root_dir)
Example #14
0
def test_windows_should_add_when_cache_on_different_drive(
        tmp_dir, dvc, temporary_windows_drive):
    dvc.config["cache"]["dir"] = temporary_windows_drive
    dvc.cache = Cache(dvc)

    (stage, ) = tmp_dir.dvc_gen({"file": "file"})
    cache_path = stage.outs[0].cache_path

    assert path_isin(cache_path, temporary_windows_drive)
    assert os.path.isfile(cache_path)
    filecmp.cmp("file", cache_path)
Example #15
0
    def _ignore(self):
        from dvc.updater import Updater

        updater = Updater(self.dvc_dir)

        flist = ([self.config.config_local_file, updater.updater_file] +
                 self.state.files + self.lock.files + updater.lock.files)

        if path_isin(self.cache.local.cache_dir, self.root_dir):
            flist += [self.cache.local.cache_dir]

        self.scm.ignore_list(flist)
Example #16
0
    def _ignore(self):
        flist = [
            self.config.files["local"],
            self.tmp_dir,
        ]
        if self.experiments:
            flist.append(self.experiments.exp_dir)

        if path_isin(self.cache.local.cache_dir, self.root_dir):
            flist += [self.cache.local.cache_dir]

        self.scm.ignore_list(flist)
Example #17
0
    def _check_stage_path(repo, path):
        assert repo is not None

        real_path = os.path.realpath(path)
        if not os.path.exists(real_path):
            raise StagePathNotFoundError(path)

        if not os.path.isdir(real_path):
            raise StagePathNotDirectoryError(path)

        proj_dir = os.path.realpath(repo.root_dir)
        if real_path != proj_dir and not path_isin(real_path, proj_dir):
            raise StagePathOutsideError(path)
Example #18
0
    def _get_gitignore(self, path):
        ignore_file_dir = os.path.dirname(path)

        assert os.path.isabs(path)
        assert os.path.isabs(ignore_file_dir)

        entry = relpath(path, ignore_file_dir).replace(os.sep, "/")
        # NOTE: using '/' prefix to make path unambiguous
        if len(entry) > 0 and entry[0] != "/":
            entry = "/" + entry

        gitignore = os.path.join(ignore_file_dir, self.GITIGNORE)

        if not path_isin(os.path.realpath(gitignore), self.root_dir):
            raise FileNotInRepoError(path)

        return entry, gitignore
Example #19
0
File: utils.py Project: dapivei/dvc
def check_stage_path(repo, path, is_wdir=False):
    assert repo is not None

    error_msg = "{wdir_or_path} '{path}' {{}}".format(
        wdir_or_path="stage working dir" if is_wdir else "file path",
        path=path,
    )

    real_path = os.path.realpath(path)
    if not os.path.exists(real_path):
        raise StagePathNotFoundError(error_msg.format("does not exist"))

    if not os.path.isdir(real_path):
        raise StagePathNotDirectoryError(error_msg.format("is not directory"))

    proj_dir = os.path.realpath(repo.root_dir)
    if real_path != proj_dir and not path_isin(real_path, proj_dir):
        raise StagePathOutsideError(error_msg.format("is outside of DVC repo"))
Example #20
0
def imp_url(self, url, out=None, fname=None, erepo=None, frozen=True):
    from dvc.dvcfile import Dvcfile
    from dvc.stage import Stage, create_stage

    out = resolve_output(url, out)
    path, wdir, out = resolve_paths(self, out)

    # NOTE: when user is importing something from within their own repository
    if (
        erepo is None
        and os.path.exists(url)
        and path_isin(os.path.abspath(url), self.root_dir)
    ):
        url = relpath(url, wdir)

    stage = create_stage(
        Stage,
        self,
        fname or path,
        wdir=wdir,
        deps=[url],
        outs=[out],
        erepo=erepo,
    )

    if stage is None:
        return None

    dvcfile = Dvcfile(self, stage.path)
    dvcfile.remove_with_prompt(force=True)

    try:
        self.check_modified_graph([stage])
    except OutputDuplicationError as exc:
        raise OutputDuplicationError(exc.output, set(exc.stages) - {stage})

    stage.run()

    stage.frozen = frozen

    dvcfile.dump(stage)

    return stage
Example #21
0
def test_external_repo(erepo):
    url = erepo.root_dir
    # We will share cache dir, to fetch version file
    cache_dir = erepo.dvc.cache.local.cache_dir

    with patch.object(Git, "clone", wraps=Git.clone) as mock:
        with external_repo(url, cache_dir=cache_dir) as repo:
            with repo.open(os.path.join(repo.root_dir, "version")) as fd:
                assert fd.read() == "master"

        with external_repo(url, rev="branch", cache_dir=cache_dir) as repo:
            with repo.open(os.path.join(repo.root_dir, "version")) as fd:
                assert fd.read() == "branch"

        # Check cache_dir is unset
        with external_repo(url) as repo:
            assert path_isin(repo.cache.local.cache_dir, repo.root_dir)

        assert mock.call_count == 1
Example #22
0
    def collect(self, target, with_deps=False, recursive=False, graph=None):
        import networkx as nx
        from dvc.stage import Stage

        if not target:
            return list(graph) if graph else self.stages

        target = os.path.abspath(target)

        if recursive and os.path.isdir(target):
            stages = nx.dfs_postorder_nodes(graph or self.graph)
            return [stage for stage in stages if path_isin(stage.path, target)]

        stage = Stage.load(self, target)

        # Optimization: do not collect the graph for a specific target
        if not with_deps:
            return [stage]

        pipeline = get_pipeline(get_pipelines(graph or self.graph), stage)
        return list(nx.dfs_postorder_nodes(pipeline, stage))
Example #23
0
    def collect(self, target, with_deps=False, recursive=False, graph=None):
        import networkx as nx
        from dvc.stage import Stage

        G = graph or self.graph

        if not target:
            return list(G)

        target = os.path.abspath(target)

        if recursive and os.path.isdir(target):
            stages = nx.dfs_postorder_nodes(G)
            return [stage for stage in stages if path_isin(stage.path, target)]

        stage = Stage.load(self, target)
        if not with_deps:
            return [stage]

        pipeline = get_pipeline(get_pipelines(G), stage)
        return list(nx.dfs_postorder_nodes(pipeline, stage))
Example #24
0
def test_external_repo(erepo_dir):
    with erepo_dir.chdir():
        with erepo_dir.branch("branch", new=True):
            erepo_dir.dvc_gen("file", "branch", commit="create file on branch")
        erepo_dir.dvc_gen("file", "master", commit="create file on master")

    url = fspath(erepo_dir)
    # We will share cache dir, to fetch version file
    cache_dir = erepo_dir.dvc.cache.local.cache_dir

    with patch.object(Git, "clone", wraps=Git.clone) as mock:
        with external_repo(url, cache_dir=cache_dir) as repo:
            with repo.open(os.path.join(repo.root_dir, "file")) as fd:
                assert fd.read() == "master"

        with external_repo(url, rev="branch", cache_dir=cache_dir) as repo:
            with repo.open(os.path.join(repo.root_dir, "file")) as fd:
                assert fd.read() == "branch"

        # Check cache_dir is unset
        with external_repo(url) as repo:
            assert path_isin(repo.cache.local.cache_dir, repo.root_dir)

        assert mock.call_count == 1
Example #25
0
def imp_url(
    self,
    url,
    out=None,
    fname=None,
    erepo=None,
    frozen=True,
    no_exec=False,
    remote=None,
    to_remote=False,
    desc=None,
    jobs=None,
):
    from dvc.dvcfile import Dvcfile
    from dvc.stage import Stage, create_stage, restore_meta

    out = resolve_output(url, out)
    path, wdir, out = resolve_paths(self,
                                    out,
                                    always_local=to_remote and not out)

    if to_remote and no_exec:
        raise InvalidArgumentError(
            "--no-exec can't be combined with --to-remote")

    if not to_remote and remote:
        raise InvalidArgumentError(
            "--remote can't be used without --to-remote")

    # NOTE: when user is importing something from within their own repository
    if (erepo is None and os.path.exists(url)
            and path_isin(os.path.abspath(url), self.root_dir)):
        url = relpath(url, wdir)

    stage = create_stage(
        Stage,
        self,
        fname or path,
        wdir=wdir,
        deps=[url],
        outs=[out],
        erepo=erepo,
    )
    restore_meta(stage)

    if desc:
        stage.outs[0].desc = desc

    dvcfile = Dvcfile(self, stage.path)
    dvcfile.remove()

    try:
        new_index = self.index.add(stage)
        new_index.check_graph()
    except OutputDuplicationError as exc:
        raise OutputDuplicationError(exc.output, set(exc.stages) - {stage})

    if no_exec:
        stage.ignore_outs()
    elif to_remote:
        remote_odb = self.cloud.get_remote_odb(remote, "import-url")
        stage.outs[0].transfer(url, odb=remote_odb, jobs=jobs)
        stage.save_deps()
        stage.md5 = stage.compute_md5()
    else:
        stage.run(jobs=jobs)

    stage.frozen = frozen

    dvcfile.dump(stage)

    return stage
Example #26
0
    def __init__(self, stage, path, *args, **kwargs):
        if stage and path_isin(path, stage.repo.root_dir):
            path = relpath(path, stage.wdir)

        super().__init__(stage, path, *args, **kwargs)
Example #27
0
    def _collect_inside(self, path, graph):
        import networkx as nx

        stages = nx.dfs_postorder_nodes(graph)
        return [stage for stage in stages if path_isin(stage.path, path)]
Example #28
0
def test_path_isin_on_common_substring_path():
    path1 = os.path.join("path", "to", "folder1")
    path2 = os.path.join("path", "to", "folder")

    assert not path_isin(path1, path2)
Example #29
0
def test_path_isin_accepts_pathinfo():
    child = os.path.join("path", "to", "folder")
    parent = PathInfo(child) / ".."

    assert path_isin(child, parent)
    assert not path_isin(parent, child)
Example #30
0
def test_path_isin_with_absolute_path():
    parent = os.path.abspath("path")
    child = os.path.join(parent, "to", "folder")

    assert path_isin(child, parent)