def _get_repo_dirs( self, root_dir: str = None, fs: "FileSystem" = None, uninitialized: bool = False, scm: "Base" = None, ): from dvc.fs import localfs from dvc.scm import SCM, SCMError dvc_dir = None tmp_dir = None try: root_dir = self.find_root(root_dir, fs) fs = fs or localfs dvc_dir = fs.path.join(root_dir, self.DVC_DIR) tmp_dir = fs.path.join(dvc_dir, "tmp") except NotDvcRepoError: if not uninitialized: raise if not scm: try: scm = SCM(root_dir or os.curdir) except SCMError: scm = SCM(os.curdir, no_scm=True) if not fs or not root_dir: root_dir = scm.root_dir assert root_dir return root_dir, dvc_dir, tmp_dir
def _get_repo_dirs( self, root_dir: str = None, fs: "FileSystem" = None, uninitialized: bool = False, ): from dvc.scm import SCM, Base, SCMError from dvc.utils.fs import makedirs dvc_dir = None tmp_dir = None try: root_dir = self.find_root(root_dir, fs) dvc_dir = os.path.join(root_dir, self.DVC_DIR) tmp_dir = os.path.join(dvc_dir, "tmp") makedirs(tmp_dir, exist_ok=True) except NotDvcRepoError: if not uninitialized: raise try: scm = SCM(root_dir or os.curdir) except SCMError: scm = SCM(os.curdir, no_scm=True) assert isinstance(scm, Base) root_dir = scm.root_dir return root_dir, dvc_dir, tmp_dir
def _get_repo_dirs( self, root_dir: str = None, scm: "Base" = None, rev: str = None, uninitialized: bool = False, ): assert bool(scm) == bool(rev) from dvc.fs.scm import GitFileSystem from dvc.scm import SCM, Base, Git, SCMError from dvc.utils.fs import makedirs dvc_dir = None tmp_dir = None try: fs = (GitFileSystem(scm=scm, rev=rev) if isinstance(scm, Git) and rev else None) root_dir = self.find_root(root_dir, fs) dvc_dir = os.path.join(root_dir, self.DVC_DIR) tmp_dir = os.path.join(dvc_dir, "tmp") makedirs(tmp_dir, exist_ok=True) except NotDvcRepoError: if not uninitialized: raise try: scm = SCM(root_dir or os.curdir) except SCMError: scm = SCM(os.curdir, no_scm=True) assert isinstance(scm, Base) root_dir = scm.root_dir return root_dir, dvc_dir, tmp_dir
def _get_repo_dirs( self, root_dir: str = None, scm: Base = None, rev: str = None, uninitialized: bool = False, ): assert bool(scm) == bool(rev) from dvc.scm import SCM from dvc.scm.git import Git from dvc.utils.fs import makedirs dvc_dir = None tmp_dir = None try: tree = scm.get_tree(rev) if isinstance(scm, Git) and rev else None root_dir = self.find_root(root_dir, tree) dvc_dir = os.path.join(root_dir, self.DVC_DIR) tmp_dir = os.path.join(dvc_dir, "tmp") makedirs(tmp_dir, exist_ok=True) except NotDvcRepoError: if not uninitialized: raise try: scm = SCM(root_dir or os.curdir) except (SCMError, InvalidGitRepositoryError): scm = SCM(os.curdir, no_scm=True) assert isinstance(scm, Base) root_dir = scm.root_dir return root_dir, dvc_dir, tmp_dir
def scm(self): from dvc.scm import SCM, SCMError if self._scm: return self._scm no_scm = self.config["core"].get("no_scm", False) try: return SCM(self.root_dir, no_scm=no_scm) except SCMError: if self._uninitialized: # might not be a git/dvc repo at all # used in `params/metrics/plots/live` targets return SCM(self.root_dir, no_scm=True) raise
def init(root_dir=os.curdir): """ Initiate dvc project in directory. Args: root_dir: Path to project's root directory. Returns: Project instance. Raises: KeyError: Raises an exception. """ root_dir = os.path.abspath(root_dir) dvc_dir = os.path.join(root_dir, Project.DVC_DIR) os.mkdir(dvc_dir) config = Config.init(dvc_dir) cache = Cache.init(dvc_dir) state = State.init(root_dir, dvc_dir) lock = Lock(dvc_dir) scm = SCM(root_dir) scm.ignore_list([cache.cache_dir, state.state_file, lock.lock_file]) ignore_file = os.path.join(dvc_dir, scm.ignore_file()) scm.add([config.config_file, ignore_file]) return Project(root_dir)
def __init__(self, root_dir=None): from dvc.config import Config from dvc.state import State from dvc.lock import Lock from dvc.scm import SCM from dvc.cache import Cache from dvc.data_cloud import DataCloud from dvc.updater import Updater root_dir = self.find_root(root_dir) self.root_dir = os.path.abspath(os.path.realpath(root_dir)) self.dvc_dir = os.path.join(self.root_dir, self.DVC_DIR) self.config = Config(self.dvc_dir) self.scm = SCM(self.root_dir, project=self) self.lock = Lock(self.dvc_dir) # NOTE: storing state and link_state in the repository itself to avoid # any possible state corruption in 'shared cache dir' scenario. self.state = State(self, self.config.config) core = self.config.config[Config.SECTION_CORE] logger.set_level(core.get(Config.SECTION_CORE_LOGLEVEL)) self.cache = Cache(self) self.cloud = DataCloud(self, config=self.config.config) self.updater = Updater(self.dvc_dir) self.files_to_git_add = [] self._ignore() self.updater.check()
def __init__(self, root_dir): from dvc.logger import Logger from dvc.config import Config from dvc.state import LinkState, State from dvc.lock import Lock from dvc.scm import SCM from dvc.cache import Cache from dvc.data_cloud import DataCloud from dvc.updater import Updater from dvc.prompt import Prompt self.root_dir = os.path.abspath(os.path.realpath(root_dir)) self.dvc_dir = os.path.join(self.root_dir, self.DVC_DIR) self.config = Config(self.dvc_dir) self.scm = SCM(self.root_dir) self.lock = Lock(self.dvc_dir) # NOTE: storing state and link_state in the repository itself to avoid # any possible state corruption in 'shared cache dir' scenario. self.state = State(self) self.link_state = LinkState(self) core = self.config._config[Config.SECTION_CORE] self.logger = Logger(core.get(Config.SECTION_CORE_LOGLEVEL, None)) self.cache = Cache(self) self.cloud = DataCloud(self, config=self.config._config) self.updater = Updater(self.dvc_dir) self.prompt = Prompt() self._ignore() self.updater.check()
def __init__(self, root_dir=None, scm=None, rev=None): from dvc.state import State, StateNoop from dvc.lock import make_lock from dvc.scm import SCM from dvc.cache import Cache from dvc.data_cloud import DataCloud from dvc.repo.metrics import Metrics from dvc.repo.plots import Plots from dvc.repo.params import Params from dvc.scm.tree import WorkingTree from dvc.utils.fs import makedirs from dvc.stage.cache import StageCache if scm: # use GitTree instead of WorkingTree as default repo tree instance tree = scm.get_tree(rev) self.root_dir = self.find_root(root_dir, tree) self.scm = scm self.tree = tree self.state = StateNoop() else: root_dir = self.find_root(root_dir) self.root_dir = os.path.abspath(os.path.realpath(root_dir)) self.tree = WorkingTree(self.root_dir) self.dvc_dir = os.path.join(self.root_dir, self.DVC_DIR) self.config = Config(self.dvc_dir, tree=self.tree) if not scm: no_scm = self.config["core"].get("no_scm", False) self.scm = SCM(self.root_dir, no_scm=no_scm) self.tmp_dir = os.path.join(self.dvc_dir, "tmp") self.index_dir = os.path.join(self.tmp_dir, "index") makedirs(self.index_dir, exist_ok=True) hardlink_lock = self.config["core"].get("hardlink_lock", False) self.lock = make_lock( os.path.join(self.tmp_dir, "lock"), tmp_dir=self.tmp_dir, hardlink_lock=hardlink_lock, friendly=True, ) self.cache = Cache(self) self.cloud = DataCloud(self) if not scm: # NOTE: storing state and link_state in the repository itself to # avoid any possible state corruption in 'shared cache dir' # scenario. self.state = State(self.cache.local) self.stage_cache = StageCache(self) self.metrics = Metrics(self) self.plots = Plots(self) self.params = Params(self) self._ignore()
def init(root_dir=os.curdir, no_scm=False): """ Initiate dvc project in directory. Args: root_dir: Path to project's root directory. Returns: Project instance. Raises: KeyError: Raises an exception. """ root_dir = os.path.abspath(root_dir) dvc_dir = os.path.join(root_dir, Project.DVC_DIR) scm = SCM(root_dir) if type(scm) == Base and not no_scm: msg = "{} is not tracked by any supported scm tool(e.g. git).".format( root_dir) raise InitError(msg) os.mkdir(dvc_dir) config = Config.init(dvc_dir) proj = Project(root_dir) scm.add([config.config_file]) if scm.ignore_file(): scm.add([os.path.join(dvc_dir, scm.ignore_file())]) return proj
def test_init_sub_dir(tmp_dir): Repo.init(os.fspath(tmp_dir)) subdir = tmp_dir / "dir" subdir.mkdir() scm = SCM(os.fspath(subdir)) assert scm.root_dir == os.fspath(tmp_dir)
def test_branch(self): scm = SCM(self._root_dir) scm.add([self.DATA_SUB_DIR]) scm.commit("add data_dir/data_sub_dir/data_sub") tree = GitTree(self.git, "master") self.assertWalkEqual( tree.walk("."), [ (self._root_dir, ["data_dir"], ["code.py"]), (join(self._root_dir, "data_dir"), ["data_sub_dir"], []), ( join(self._root_dir, "data_dir", "data_sub_dir"), [], ["data_sub"], ), ], ) self.assertWalkEqual( tree.walk(join("data_dir", "data_sub_dir")), [( join(self._root_dir, "data_dir", "data_sub_dir"), [], ["data_sub"], )], )
def _scm_in_use(): try: scm = SCM(root_dir=Repo.find_root()) return type(scm).__name__ except SCMError: return NoSCM.__name__ except NotDvcRepoError: pass
def init(root_dir=os.curdir, no_scm=False, force=False): """ Initiate dvc project in directory. Args: root_dir: Path to project's root directory. Returns: Project instance. Raises: KeyError: Raises an exception. """ import colorama import shutil from dvc.scm import SCM, Base from dvc.config import Config from dvc.logger import logger root_dir = os.path.abspath(root_dir) dvc_dir = os.path.join(root_dir, Project.DVC_DIR) scm = SCM(root_dir) if type(scm) == Base and not no_scm: msg = "{} is not tracked by any supported scm tool(e.g. git)." raise InitError(msg.format(root_dir)) if os.path.isdir(dvc_dir): if not force: msg = "'{}' exists. Use '-f' to force." raise InitError(msg.format(os.path.relpath(dvc_dir))) shutil.rmtree(dvc_dir) os.mkdir(dvc_dir) config = Config.init(dvc_dir) proj = Project(root_dir) scm.add([config.config_file]) if scm.ignore_file(): scm.add([os.path.join(dvc_dir, scm.ignore_file())]) logger.info('\nYou can now commit the changes to git.') logger.info( "\n" "{yellow}What's next?{nc}\n" "{yellow}------------{nc}\n" "- Check out the documentation: {blue}https://dvc.org/doc{nc}\n" "- Get help and share ideas: {blue}https://dvc.org/chat{nc}\n" "- Star us on GitHub: {blue}https://github.com/iterative/dvc{nc}" .format(yellow=colorama.Fore.YELLOW, blue=colorama.Fore.BLUE, green=colorama.Fore.GREEN, nc=colorama.Fore.RESET) ) return proj
def init(root_dir=os.curdir, no_scm=False, force=False): """ Creates an empty project on the given directory -- basically a `.dvc` directory with subdirectories for configuration and cache. It should be tracked by a SCM or use the `--no-scm` flag. If the given directory is not empty, you must use the `--force` flag to override it. Args: root_dir: Path to project's root directory. Returns: Project instance. Raises: KeyError: Raises an exception. """ import shutil from dvc.scm import SCM, Base from dvc.config import Config root_dir = os.path.abspath(root_dir) dvc_dir = os.path.join(root_dir, Project.DVC_DIR) scm = SCM(root_dir) if type(scm) == Base and not no_scm: raise InitError( "{project} is not tracked by any supported scm tool" " (e.g. git). Use '--no-scm' if you don't want to use any scm.".format( project=root_dir ) ) if os.path.isdir(dvc_dir): if not force: raise InitError( "'{project}' exists. Use '-f' to force.".format( project=os.path.relpath(dvc_dir) ) ) shutil.rmtree(dvc_dir) os.mkdir(dvc_dir) config = Config.init(dvc_dir) proj = Project(root_dir) scm.add([config.config_file]) if scm.ignore_file: scm.add([os.path.join(dvc_dir, scm.ignore_file)]) logger.info("\nYou can now commit the changes to git.\n") proj._welcome_message() return proj
def __init__(self, root_dir): self.root_dir = os.path.abspath(os.path.realpath(root_dir)) self.dvc_dir = os.path.join(self.root_dir, self.DVC_DIR) self.scm = SCM(self.root_dir) self.lock = Lock(self.dvc_dir) self.cache = Cache(self.dvc_dir) self.state = State(self.root_dir, self.dvc_dir) self.config = Config(self.dvc_dir) self.logger = Logger(self.config._config) self.cloud = DataCloud(self.cache, self.config._config)
def _copy_if_git_file(self, to_path): src_path = self.def_path repo_dir = cached_clone(**self.def_repo) if not self._is_git_file(repo_dir, src_path): return False src_full_path = os.path.join(repo_dir, src_path) dst_full_path = os.path.abspath(to_path) fs_copy(src_full_path, dst_full_path) self.def_repo[self.PARAM_REV_LOCK] = SCM(repo_dir).get_rev() return True
def _scm_in_use(): from dvc.exceptions import NotDvcRepoError from dvc.repo import Repo from dvc.scm import SCM, NoSCM from dvc.scm.base import SCMError try: scm = SCM(root_dir=Repo.find_root()) return type(scm).__name__ except SCMError: return NoSCM.__name__ except NotDvcRepoError: pass
def __init__(self, root_dir): self.root_dir = os.path.abspath(os.path.realpath(root_dir)) self.dvc_dir = os.path.join(self.root_dir, self.DVC_DIR) self.scm = SCM(self.root_dir) self.lock = Lock(self.dvc_dir) self.cache = Cache(self.dvc_dir) self.state = State(self.root_dir, self.dvc_dir) self.config = Config(self.dvc_dir) self.logger = Logger(self.config._config[Config.SECTION_CORE].get( Config.SECTION_CORE_LOGLEVEL, None)) self.cloud = DataCloud(cache=self.cache, state=self.state, config=self.config._config)
def __init__(self, root_dir=None): from dvc.state import State from dvc.lock import make_lock from dvc.scm import SCM from dvc.cache import Cache from dvc.data_cloud import DataCloud from dvc.repo.metrics import Metrics from dvc.scm.tree import WorkingTree from dvc.repo.tag import Tag from dvc.utils import makedirs root_dir = self.find_root(root_dir) self.root_dir = os.path.abspath(os.path.realpath(root_dir)) self.dvc_dir = os.path.join(self.root_dir, self.DVC_DIR) self.config = Config(self.dvc_dir) self.scm = SCM(self.root_dir) self.tree = CleanTree(WorkingTree(self.root_dir)) self.tmp_dir = os.path.join(self.dvc_dir, "tmp") makedirs(self.tmp_dir, exist_ok=True) hardlink_lock = self.config.config["core"].get("hardlink_lock", False) self.lock = make_lock( os.path.join(self.dvc_dir, "lock"), tmp_dir=os.path.join(self.dvc_dir, "tmp"), hardlink_lock=hardlink_lock, friendly=True, ) # NOTE: storing state and link_state in the repository itself to avoid # any possible state corruption in 'shared cache dir' scenario. self.state = State(self, self.config.config) core = self.config.config[Config.SECTION_CORE] level = core.get(Config.SECTION_CORE_LOGLEVEL) if level: logger.setLevel(level.upper()) self.cache = Cache(self) self.cloud = DataCloud(self) self.metrics = Metrics(self) self.tag = Tag(self) self._ignore()
def __init__(self, root_dir=None): from dvc.state import State from dvc.lock import make_lock from dvc.scm import SCM from dvc.cache import Cache from dvc.data_cloud import DataCloud from dvc.repo.metrics import Metrics from dvc.repo.params import Params from dvc.scm.tree import WorkingTree from dvc.utils.fs import makedirs from dvc.stage.cache import StageCache root_dir = self.find_root(root_dir) self.root_dir = os.path.abspath(os.path.realpath(root_dir)) self.dvc_dir = os.path.join(self.root_dir, self.DVC_DIR) self.config = Config(self.dvc_dir) no_scm = self.config["core"].get("no_scm", False) self.scm = SCM(self.root_dir, no_scm=no_scm) self.tree = WorkingTree(self.root_dir) self.tmp_dir = os.path.join(self.dvc_dir, "tmp") self.index_dir = os.path.join(self.tmp_dir, "index") makedirs(self.index_dir, exist_ok=True) hardlink_lock = self.config["core"].get("hardlink_lock", False) self.lock = make_lock( os.path.join(self.tmp_dir, "lock"), tmp_dir=self.tmp_dir, hardlink_lock=hardlink_lock, friendly=True, ) # NOTE: storing state and link_state in the repository itself to avoid # any possible state corruption in 'shared cache dir' scenario. self.state = State(self) self.cache = Cache(self) self.cloud = DataCloud(self) self.stage_cache = StageCache(self.cache.local.cache_dir) self.metrics = Metrics(self) self.params = Params(self) self._ignore()
def init(root_dir=os.curdir, no_scm=False, force=False): """ Creates an empty repo on the given directory -- basically a `.dvc` directory with subdirectories for configuration and cache. It should be tracked by a SCM or use the `--no-scm` flag. If the given directory is not empty, you must use the `--force` flag to override it. Args: root_dir: Path to repo's root directory. Returns: Repo instance. Raises: KeyError: Raises an exception. """ root_dir = os.path.realpath(root_dir) dvc_dir = os.path.join(root_dir, Repo.DVC_DIR) scm = SCM(root_dir) if isinstance(scm, NoSCM) and not no_scm: raise InitError( "{repo} is not tracked by any supported scm tool (e.g. git). " "Use '--no-scm' if you don't want to use any scm.".format( repo=root_dir)) if os.path.isdir(dvc_dir): if not force: raise InitError("'{repo}' exists. Use '-f' to force.".format( repo=relpath(dvc_dir))) remove(dvc_dir) os.mkdir(dvc_dir) config = Config.init(dvc_dir) proj = Repo(root_dir) scm.add([config.config_file]) if scm.ignore_file: scm.add([os.path.join(dvc_dir, scm.ignore_file)]) logger.info("\nYou can now commit the changes to git.\n") _welcome_message() return proj
def collect(self): from dvc.scm import SCM from dvc.utils import is_binary from dvc.project import Project from dvc.exceptions import NotDvcProjectError self.info[self.PARAM_DVC_VERSION] = VERSION self.info[self.PARAM_IS_BINARY] = is_binary() self.info[self.PARAM_USER_ID] = self._get_user_id() self.info[self.PARAM_SYSTEM_INFO] = self._collect_system_info() try: scm = SCM(root_dir=Project._find_root()) self.info[self.PARAM_SCM_CLASS] = type(scm).__name__ except NotDvcProjectError: pass
def __init__(self, root_dir): self.root_dir = os.path.abspath(os.path.realpath(root_dir)) self.dvc_dir = os.path.join(self.root_dir, self.DVC_DIR) self.config = Config(self.dvc_dir) self.scm = SCM(self.root_dir) self.lock = Lock(self.dvc_dir) self.link_state = LinkState(self.root_dir, self.dvc_dir) self.logger = Logger(self.config._config[Config.SECTION_CORE].get( Config.SECTION_CORE_LOGLEVEL, None)) self.cache = Cache(self) self.cloud = DataCloud(self, config=self.config._config) self.updater = Updater(self.dvc_dir) self._ignore() self.updater.check()
def __init__(self, root_dir=None): from dvc.config import Config from dvc.state import State from dvc.lock import Lock from dvc.scm import SCM from dvc.cache import Cache from dvc.data_cloud import DataCloud from dvc.updater import Updater from dvc.repo.metrics import Metrics from dvc.scm.tree import WorkingTree from dvc.repo.tag import Tag from dvc.repo.pkg import Pkg root_dir = self.find_root(root_dir) self.root_dir = os.path.abspath(os.path.realpath(root_dir)) self.dvc_dir = os.path.join(self.root_dir, self.DVC_DIR) self.config = Config(self.dvc_dir) self.tree = WorkingTree(self.root_dir) self.scm = SCM(self.root_dir, repo=self) self.lock = Lock(self.dvc_dir) # NOTE: storing state and link_state in the repository itself to avoid # any possible state corruption in 'shared cache dir' scenario. self.state = State(self, self.config.config) core = self.config.config[Config.SECTION_CORE] level = core.get(Config.SECTION_CORE_LOGLEVEL) if level: logger.setLevel(level.upper()) self.cache = Cache(self) self.cloud = DataCloud(self, config=self.config.config) self.updater = Updater(self.dvc_dir) self.metrics = Metrics(self) self.tag = Tag(self) self.pkg = Pkg(self) self._ignore() self.updater.check()
def collect(self): """Collect analytics report.""" from dvc.scm import SCM from dvc.utils import is_binary from dvc.repo import Repo from dvc.exceptions import NotDvcRepoError self.info[self.PARAM_DVC_VERSION] = __version__ self.info[self.PARAM_IS_BINARY] = is_binary() self.info[self.PARAM_USER_ID] = self._get_user_id() self.info[self.PARAM_SYSTEM_INFO] = self._collect_system_info() try: scm = SCM(root_dir=Repo.find_root()) self.info[self.PARAM_SCM_CLASS] = type(scm).__name__ except NotDvcRepoError: pass
def init(root_dir=os.curdir, no_scm=False, force=False): """ Initiate dvc project in directory. Args: root_dir: Path to project's root directory. Returns: Project instance. Raises: KeyError: Raises an exception. """ import shutil from dvc.scm import SCM, Base from dvc.config import Config root_dir = os.path.abspath(root_dir) dvc_dir = os.path.join(root_dir, Project.DVC_DIR) scm = SCM(root_dir) if type(scm) == Base and not no_scm: msg = "{} is not tracked by any supported scm tool(e.g. git)." raise InitError(msg.format(root_dir)) if os.path.isdir(dvc_dir): if not force: msg = "'{}' exists. Use '-f' to force." raise InitError(msg.format(os.path.relpath(dvc_dir))) shutil.rmtree(dvc_dir) os.mkdir(dvc_dir) config = Config.init(dvc_dir) proj = Project(root_dir) scm.add([config.config_file]) if scm.ignore_file(): scm.add([os.path.join(dvc_dir, scm.ignore_file())]) logger.info('\nYou can now commit the changes to git.\n') proj._welcome_message() return proj
def __init__(self, root_dir): self.root_dir = os.path.abspath(os.path.realpath(root_dir)) self.dvc_dir = os.path.join(self.root_dir, self.DVC_DIR) self.config = Config(self.dvc_dir) self.scm = SCM(self.root_dir) self.lock = Lock(self.dvc_dir) # NOTE: storing state and link_state in the repository itself to avoid # any possible state corruption in 'shared cache dir' scenario. self.state = State(self) self.link_state = LinkState(self) self.logger = Logger(self.config._config[Config.SECTION_CORE].get( Config.SECTION_CORE_LOGLEVEL, None)) self.cache = Cache(self) self.cloud = DataCloud(self, config=self.config._config) self.updater = Updater(self.dvc_dir) self._ignore() self.updater.check()
def test_git_submodule(self): self.assertIsInstance(SCM(os.curdir), Git)
def test_git(self): Repo.init(os.curdir) self.assertIsInstance(SCM(self._root_dir), Git)