Esempio n. 1
0
File: base.py Progetto: ivalearn/dvc
    def _repro_dvc(cls, dvc_dir: Optional[str], rel_cwd: Optional[str]):
        from dvc.repo import Repo

        dvc = Repo(dvc_dir)
        if cls.QUIET:
            dvc.scm.quiet = cls.QUIET
        if dvc_dir is not None:
            old_cwd: Optional[str] = os.getcwd()
            if rel_cwd:
                os.chdir(os.path.join(dvc.root_dir, rel_cwd))
            else:
                os.chdir(dvc.root_dir)
        else:
            old_cwd = None
        logger.debug("Running repro in '%s'", os.getcwd())

        try:
            yield dvc
        except CheckpointKilledError:
            raise
        except DvcException:
            logger.exception("")
            raise
        except Exception:
            logger.exception("unexpected error")
            raise
        finally:
            dvc.close()
            if old_cwd:
                os.chdir(old_cwd)
Esempio n. 2
0
def external_repo(url=None, rev=None, rev_lock=None, cache_dir=None):
    from dvc.repo import Repo

    path = _external_repo(url=url, rev=rev_lock or rev, cache_dir=cache_dir)
    repo = Repo(path)
    yield repo
    repo.close()
Esempio n. 3
0
def test_branch_config(tmp_dir, scm):
    tmp_dir.scm_gen("foo", "foo", commit="init")

    # sanity check
    with pytest.raises(NotDvcRepoError):
        Repo().close()

    scm.checkout("branch", create_new=True)
    dvc = Repo.init()
    with dvc.config.edit() as conf:
        conf["remote"]["branch"] = {"url": "/some/path"}
    dvc.close()

    scm.add([os.path.join(".dvc", "config")])
    scm.commit("init dvc")
    scm.checkout("master")

    with pytest.raises(NotDvcRepoError):
        Repo(rev="master").close()

    dvc = Repo(rev="branch")
    try:
        assert dvc.config["remote"]["branch"]["url"] == "/some/path"
    finally:
        dvc.close()
Esempio n. 4
0
    def _repro_dvc(
        cls,
        info: "ExecutorInfo",
        log_errors: bool = True,
        **kwargs,
    ):
        from dvc.repo import Repo
        from dvc.stage.monitor import CheckpointKilledError

        dvc = Repo(os.path.join(info.root_dir, info.dvc_dir))
        if cls.QUIET:
            dvc.scm_context.quiet = cls.QUIET
        old_cwd = os.getcwd()
        if info.wdir:
            os.chdir(os.path.join(dvc.scm.root_dir, info.wdir))
        else:
            os.chdir(dvc.root_dir)

        try:
            logger.debug("Running repro in '%s'", os.getcwd())
            yield dvc
        except CheckpointKilledError:
            raise
        except DvcException:
            if log_errors:
                logger.exception("")
            raise
        except Exception:
            if log_errors:
                logger.exception("unexpected error")
            raise
        finally:
            dvc.close()
            os.chdir(old_cwd)
Esempio n. 5
0
def _get_remote_config(url):
    try:
        repo = Repo(url)
    except NotDvcRepoError:
        return {}

    try:
        name = repo.config["core"].get("remote")
        if not name:
            # Fill the empty upstream entry with a new remote pointing to the
            # original repo's cache location.
            name = "auto-generated-upstream"
            return {
                "core": {
                    "remote": name
                },
                "remote": {
                    name: {
                        "url": repo.cache.local.cache_dir
                    }
                },
            }

        # Use original remote to make sure that we are using correct url,
        # credential paths, etc if they are relative to the config location.
        return {"remote": {name: repo.config["remote"][name]}}
    finally:
        repo.close()
Esempio n. 6
0
def external_repo(url,
                  rev=None,
                  for_write=False,
                  cache_dir=None,
                  cache_types=None,
                  **kwargs):
    from dvc.config import NoRemoteError
    from dvc.scm.git import Git

    logger.debug("Creating external repo %s@%s", url, rev)
    path = _cached_clone(url, rev, for_write=for_write)
    # Local HEAD points to the tip of whatever branch we first cloned from
    # (which may not be the default branch), use origin/HEAD here to get
    # the tip of the default branch
    rev = rev or "refs/remotes/origin/HEAD"

    cache_config = {
        "cache": {
            "dir": cache_dir or _get_cache_dir(url),
            "type": cache_types
        }
    }

    config = _get_remote_config(url) if os.path.isdir(url) else {}
    config.update(cache_config)

    root_dir = path if for_write else os.path.realpath(path)
    repo_kwargs = dict(
        root_dir=root_dir,
        url=url,
        scm=None if for_write else Git(root_dir),
        rev=None if for_write else rev,
        config=config,
        repo_factory=erepo_factory(url, cache_config),
        **kwargs,
    )

    if "subrepos" not in repo_kwargs:
        repo_kwargs["subrepos"] = True

    if "uninitialized" not in repo_kwargs:
        repo_kwargs["uninitialized"] = True

    repo = Repo(**repo_kwargs)

    try:
        yield repo
    except NoRemoteError as exc:
        raise NoRemoteInExternalRepoError(url) from exc
    except OutputNotFoundError as exc:
        if exc.repo is repo:
            raise NoOutputInExternalRepoError(exc.output, repo.root_dir,
                                              url) from exc
        raise
    except FileMissingError as exc:
        raise PathMissingError(exc.path, url) from exc
    finally:
        repo.close()
        if for_write:
            _remove(path)
Esempio n. 7
0
def _external_repo(url=None, rev=None, cache_dir=None):
    from dvc.config import Config
    from dvc.cache import CacheConfig
    from dvc.repo import Repo

    key = (url, rev, cache_dir)
    if key in REPO_CACHE:
        return REPO_CACHE[key]

    new_path = tempfile.mkdtemp("dvc-erepo")

    # Copy and adjust existing clone
    if (url, None, None) in REPO_CACHE:
        old_path = REPO_CACHE[url, None, None]

        # This one unlike shutil.copytree() works with an existing dir
        copy_tree(old_path, new_path)
    else:
        # Create a new clone
        _clone_repo(url, new_path)

        # Save clean clone dir so that we will have access to a default branch
        clean_clone_path = tempfile.mkdtemp("dvc-erepo")
        copy_tree(new_path, clean_clone_path)
        REPO_CACHE[url, None, None] = clean_clone_path

    # Adjust new clone/copy to fit rev and cache_dir

    # Checkout needs to be done first because current branch might not be
    # DVC repository
    if rev is not None:
        _git_checkout(new_path, rev)

    repo = Repo(new_path)
    try:
        # check if the URL is local and no default remote is present
        # add default remote pointing to the original repo's cache location
        if os.path.isdir(url):
            rconfig = RemoteConfig(repo.config)
            if not _default_remote_set(rconfig):
                original_repo = Repo(url)
                try:
                    rconfig.add(
                        "auto-generated-upstream",
                        original_repo.cache.local.cache_dir,
                        default=True,
                        level=Config.LEVEL_LOCAL,
                    )
                finally:
                    original_repo.close()

        if cache_dir is not None:
            cache_config = CacheConfig(repo.config)
            cache_config.set_dir(cache_dir, level=Config.LEVEL_LOCAL)
    finally:
        # Need to close/reopen repo to force config reread
        repo.close()

    REPO_CACHE[key] = new_path
    return new_path
Esempio n. 8
0
    def run(self):
        from dvc.repo import Repo

        try:
            repo = Repo()
            repo.close()
        except NotDvcRepoError:
            return 0

        return self._run()
Esempio n. 9
0
def external_repo(url=None, rev=None, rev_lock=None, cache_dir=None):
    from dvc.repo import Repo

    path = _external_repo(url=url, rev=rev_lock or rev, cache_dir=cache_dir)
    repo = Repo(path)
    try:
        yield repo
    except NoRemoteError as exc:
        raise RemoteNotSpecifiedInExternalRepoError(url, cause=exc)
    repo.close()
Esempio n. 10
0
    def _repro_dvc(
        cls,
        dvc_dir: Optional[str],
        rel_cwd: Optional[str],
        log_errors: bool,
        pidfile: Optional[str] = None,
        git_url: Optional[str] = None,
        **kwargs,
    ):
        from dvc.repo import Repo
        from dvc.utils.serialize import modify_yaml

        dvc = Repo(dvc_dir)
        if cls.QUIET:
            dvc.scm.quiet = cls.QUIET
        if dvc_dir is not None:
            old_cwd: Optional[str] = os.getcwd()
            if rel_cwd:
                os.chdir(os.path.join(dvc.root_dir, rel_cwd))
            else:
                os.chdir(dvc.root_dir)
        else:
            old_cwd = None
        if pidfile is not None:
            info = ExecutorInfo(
                os.getpid(),
                git_url,
                dvc.scm.get_rev(),
                cls.DEFAULT_LOCATION,
            )
            with modify_yaml(pidfile) as d:
                d.update(info.to_dict())
        logger.debug("Running repro in '%s'", os.getcwd())

        try:
            yield dvc
        except CheckpointKilledError:
            raise
        except DvcException:
            if log_errors:
                logger.exception("")
            raise
        except Exception:
            if log_errors:
                logger.exception("unexpected error")
            raise
        finally:
            if pidfile is not None:
                remove(pidfile)
            dvc.close()
            if old_cwd:
                os.chdir(old_cwd)
Esempio n. 11
0
    def _fix_upstream(self):
        if not os.path.isdir(self.url):
            return

        remote_name = self.config["core"].get("remote")
        src_repo = Repo(self.url)
        try:
            if remote_name:
                self._fix_local_remote(src_repo, remote_name)
            else:
                self._add_upstream(src_repo)
        finally:
            src_repo.close()
Esempio n. 12
0
def external_repo(url=None, rev=None, rev_lock=None, cache_dir=None):
    from dvc.repo import Repo

    path = _external_repo(url=url, rev=rev_lock or rev, cache_dir=cache_dir)
    repo = Repo(path)
    try:
        yield repo
    except NoRemoteError:
        raise NoRemoteInExternalRepoError(url)
    except OutputNotFoundError as exc:
        if exc.repo is repo:
            raise NoOutputInExternalRepoError(exc.output, repo.root_dir, url)
        raise
    repo.close()
Esempio n. 13
0
    def _set_upstream(self):
        # check if the URL is local and no default remote is present
        # add default remote pointing to the original repo's cache location
        if os.path.isdir(self.url):
            if not self.config["core"].get("remote"):
                src_repo = Repo(self.url)
                try:
                    cache_dir = src_repo.cache.local.cache_dir
                finally:
                    src_repo.close()

                self.config["remote"]["auto-generated-upstream"] = {
                    "url": cache_dir
                }
                self.config["core"]["remote"] = "auto-generated-upstream"
Esempio n. 14
0
 def _set_upstream(self):
     # check if the URL is local and no default remote is present
     # add default remote pointing to the original repo's cache location
     if os.path.isdir(self.url):
         rconfig = RemoteConfig(self.config)
         if not rconfig.has_default():
             src_repo = Repo(self.url)
             try:
                 rconfig.add(
                     "auto-generated-upstream",
                     src_repo.cache.local.cache_dir,
                     default=True,
                     level=Config.LEVEL_LOCAL,
                 )
             finally:
                 src_repo.close()
Esempio n. 15
0
    def _run(self):
        from dvc.dvcfile import Dvcfile
        from dvc.repo import Repo

        dvc = Repo()

        try:
            ancestor = Dvcfile(dvc, self.args.ancestor, verify=False)
            our = Dvcfile(dvc, self.args.our, verify=False)
            their = Dvcfile(dvc, self.args.their, verify=False)

            our.merge(ancestor, their)

            return 0
        finally:
            dvc.close()
Esempio n. 16
0
File: base.py Progetto: pared/dvc
    def _repro_dvc(
        cls,
        info: "ExecutorInfo",
        log_errors: bool = True,
        infofile: Optional[str] = None,
        **kwargs,
    ):
        from dvc.repo import Repo
        from dvc.stage.monitor import CheckpointKilledError
        from dvc.utils.fs import makedirs
        from dvc.utils.serialize import modify_json

        dvc = Repo(os.path.join(info.root_dir, info.dvc_dir))
        if cls.QUIET:
            dvc.scm_context.quiet = cls.QUIET
        old_cwd = os.getcwd()
        if info.wdir:
            os.chdir(os.path.join(dvc.scm.root_dir, info.wdir))
        else:
            os.chdir(dvc.root_dir)

        if infofile is not None:
            makedirs(os.path.dirname(infofile), exist_ok=True)
            with modify_json(infofile) as d:
                d.update(info.asdict())

        try:
            logger.debug("Running repro in '%s'", os.getcwd())
            yield dvc
        except CheckpointKilledError:
            raise
        except DvcException:
            if log_errors:
                logger.exception("")
            raise
        except Exception:
            if log_errors:
                logger.exception("unexpected error")
            raise
        finally:
            if infofile is not None:
                with modify_json(infofile) as d:
                    d.update(info.asdict())
            dvc.close()
            os.chdir(old_cwd)
Esempio n. 17
0
    def _is_git_file(repo_dir, path):
        from dvc.repo import Repo

        if os.path.isabs(path):
            return False

        try:
            repo = Repo(repo_dir)
        except NotDvcRepoError:
            return True

        try:
            output = repo.find_out_by_relpath(path)
            return not output.use_cache
        except OutputNotFoundError:
            return True
        finally:
            repo.close()
Esempio n. 18
0
    def _fix_upstream(self, repo):
        if not os.path.isdir(self.url):
            return

        try:
            rel_path = os.path.relpath(repo.root_dir, self.root_dir)
            src_repo = Repo(PathInfo(self.url) / rel_path)
        except NotDvcRepoError:
            return

        try:
            remote_name = repo.config["core"].get("remote")
            if remote_name:
                self._fix_local_remote(repo, src_repo, remote_name)
            else:
                self._add_upstream(repo, src_repo)
        finally:
            src_repo.close()
Esempio n. 19
0
def _external_repo(url=None, rev=None, cache_dir=None):
    from dvc.config import Config
    from dvc.cache import CacheConfig
    from dvc.repo import Repo

    key = (url, rev, cache_dir)
    if key in REPO_CACHE:
        return REPO_CACHE[key]

    new_path = tempfile.mkdtemp("dvc-erepo")

    # Copy and adjust existing clone
    if (url, None, None) in REPO_CACHE:
        old_path = REPO_CACHE[url, None, None]

        # This one unlike shutil.copytree() works with an existing dir
        copy_tree(old_path, new_path)
    else:
        # Create a new clone
        _clone_repo(url, new_path)

        # Save clean clone dir so that we will have access to a default branch
        clean_clone_path = tempfile.mkdtemp("dvc-erepo")
        copy_tree(new_path, clean_clone_path)
        REPO_CACHE[url, None, None] = clean_clone_path

    # Adjust new clone/copy to fit rev and cache_dir

    # Checkout needs to be done first because current branch might not be
    # DVC repository
    if rev is not None:
        _git_checkout(new_path, rev)

    repo = Repo(new_path)
    try:
        if cache_dir is not None:
            cache_config = CacheConfig(repo.config)
            cache_config.set_dir(cache_dir, level=Config.LEVEL_LOCAL)
    finally:
        # Need to close/reopen repo to force config reread
        repo.close()

    REPO_CACHE[key] = new_path
    return new_path
Esempio n. 20
0
    def _fix_upstream(self):
        if not os.path.isdir(self.url):
            return

        try:
            src_repo = Repo(self.url)
        except NotDvcRepoError:
            # If ExternalRepo does not throw NotDvcRepoError and Repo does,
            # the self.url might be a bare git repo.
            # NOTE: This will fail to resolve remote with relative path,
            # same as if it was a remote DVC repo.
            return

        try:
            remote_name = self.config["core"].get("remote")
            if remote_name:
                self._fix_local_remote(src_repo, remote_name)
            else:
                self._add_upstream(src_repo)
        finally:
            src_repo.close()
Esempio n. 21
0
def _external_repo(url=None, rev=None, cache_dir=None):
    from dvc.config import Config
    from dvc.cache import CacheConfig
    from dvc.repo import Repo

    key = (url, rev, cache_dir)
    if key in REPO_CACHE:
        return REPO_CACHE[key]

    new_path = cached_clone(url, rev=rev)

    repo = Repo(new_path)
    try:
        # check if the URL is local and no default remote is present
        # add default remote pointing to the original repo's cache location
        if os.path.isdir(url):
            rconfig = RemoteConfig(repo.config)
            if not _default_remote_set(rconfig):
                original_repo = Repo(url)
                try:
                    rconfig.add(
                        "auto-generated-upstream",
                        original_repo.cache.local.cache_dir,
                        default=True,
                        level=Config.LEVEL_LOCAL,
                    )
                finally:
                    original_repo.close()

        if cache_dir is not None:
            cache_config = CacheConfig(repo.config)
            cache_config.set_dir(cache_dir, level=Config.LEVEL_LOCAL)
    finally:
        # Need to close/reopen repo to force config reread
        repo.close()

    REPO_CACHE[key] = new_path
    return new_path