def should_test(): do_test = env2bool("DVC_TEST_AZURE", undefined=None) if do_test is not None: return do_test return os.getenv("AZURE_STORAGE_CONTAINER_NAME") and os.getenv( "AZURE_STORAGE_CONNECTION_STRING")
def __init__( self, iterable=None, disable=None, level=logging.ERROR, desc=None, leave=False, bar_format=None, bytes=False, # pylint: disable=redefined-builtin file=None, total=None, postfix=None, **kwargs, ): """ bytes : shortcut for `unit='B', unit_scale=True, unit_divisor=1024, miniters=1` desc : persists after `close()` level : effective logging level for determining `disable`; used only if `disable` is unspecified disable : If (default: None) or False, will be determined by logging level. May be overridden to `True` due to non-TTY status. Skip override by specifying env var `DVC_IGNORE_ISATTY`. kwargs : anything accepted by `tqdm.tqdm()` """ kwargs = kwargs.copy() if bytes: kwargs = {**self.BYTES_DEFAULTS, **kwargs} else: kwargs.setdefault("unit_scale", total > 999 if total else True) if file is None: file = sys.stderr # auto-disable based on `logger.level` if not disable: disable = logger.getEffectiveLevel() > level # auto-disable based on TTY if (not disable and not env2bool(DVC_IGNORE_ISATTY) and hasattr(file, "isatty")): disable = not file.isatty() super().__init__( iterable=iterable, disable=disable, leave=leave, desc=desc, bar_format="!", lock_args=(False, ), total=total, **kwargs, ) self.postfix = postfix or {"info": ""} if bar_format is None: if self.__len__(): self.bar_format = (self.BAR_FMT_DEFAULT_NESTED if self.pos else self.BAR_FMT_DEFAULT) else: self.bar_format = self.BAR_FMT_NOTOTAL else: self.bar_format = bar_format self.refresh()
def should_test(): do_test = env2bool("DVC_TEST_OSS", undefined=None) if do_test is not None: return do_test return (os.getenv("OSS_ENDPOINT") and os.getenv("OSS_ACCESS_KEY_ID") and os.getenv("OSS_ACCESS_KEY_SECRET"))
def machine(self): from dvc.machine import MachineManager if self.tmp_dir and (self.config["feature"].get("machine", False) or env2bool("DVC_TEST")): return MachineManager(self) return None
def __init__( self, iterable=None, disable=None, level=logging.ERROR, desc=None, leave=False, bar_format=None, bytes=False, # pylint: disable=W0622 file=None, **kwargs ): """ bytes : shortcut for `unit='B', unit_scale=True, unit_divisor=1024, miniters=1` desc : persists after `close()` level : effective logging level for determining `disable`; used only if `disable` is unspecified disable : If (default: None), will be determined by logging level. May be overridden to `True` due to non-TTY status. Skip override by specifying env var `DVC_IGNORE_ISATTY`. kwargs : anything accepted by `tqdm.tqdm()` """ kwargs = kwargs.copy() kwargs.setdefault("unit_scale", True) if bytes: bytes_defaults = dict( unit="B", unit_scale=True, unit_divisor=1024, miniters=1 ) kwargs = merge(bytes_defaults, kwargs) if file is None: file = sys.stderr self.desc_persist = desc # auto-disable based on `logger.level` if disable is None: disable = logger.getEffectiveLevel() > level # auto-disable based on TTY if ( not disable and not env2bool("DVC_IGNORE_ISATTY") and hasattr(file, "isatty") ): disable = not file.isatty() super(Tqdm, self).__init__( iterable=iterable, disable=disable, leave=leave, desc=desc, bar_format="!", **kwargs ) if bar_format is None: if self.__len__(): self.bar_format = self.BAR_FMT_DEFAULT else: self.bar_format = self.BAR_FMT_NOTOTAL else: self.bar_format = bar_format self.refresh()
def check(self): from dvc.utils import env2bool if (os.getenv("CI") or env2bool("DVC_TEST") or PKG == "snap" or not self.is_enabled()): return self._with_lock(self._check, "checking")
def _should_test_aws(): do_test = env2bool("DVC_TEST_AWS", undefined=None) if do_test is not None: return do_test if os.getenv("AWS_ACCESS_KEY_ID") and os.getenv("AWS_SECRET_ACCESS_KEY"): return True return False
def is_enabled(): if env2bool("DVC_TEST"): return False enabled = to_bool( Config(validate=False).get("core", {}).get("analytics", "true")) logger.debug("Analytics is {}abled.".format("en" if enabled else "dis")) return enabled
def should_test(): do_test = env2bool("DVC_TEST_GCP", undefined=None) if do_test is not None: return do_test if not os.path.exists(TEST_GCP_CREDS_FILE): return False return True
def should_test(): do_test = env2bool("DVC_TEST_OSS", undefined=None) if do_test is not None: return do_test if os.getenv("OSS_ACCESS_KEY_ID") and os.getenv( "OSS_ACCESS_KEY_SECRET"): return True return False
def test_quiet_notty(caplog, capsys): with caplog.at_level(logging.INFO, logger="dvc"): for _ in Tqdm(range(10)): pass out_err = capsys.readouterr() assert out_err.out == "" if env2bool("DVC_IGNORE_ISATTY"): assert "0/10" in out_err.err else: assert out_err.err == ""
def is_enabled(): if env2bool("DVC_TEST"): return False enabled = to_bool( Config(validate=False).config.get(Config.SECTION_CORE, {}).get( Config.SECTION_CORE_ANALYTICS, "true")) logger.debug("Analytics is {}abled.".format("en" if enabled else "dis")) return enabled
def __init__(self, repo): from dvc.lock import make_lock if not (env2bool("DVC_TEST") or repo.config["core"].get("experiments", False)): raise NotImplementedError self.repo = repo self.scm_lock = make_lock( os.path.join(self.repo.tmp_dir, "exp_scm_lock"), tmp_dir=self.repo.tmp_dir, )
def is_enabled(cmd=None): from dvc.config import Config, to_bool from dvc.command.daemon import CmdDaemonBase if env2bool("DVC_TEST"): return False if isinstance(cmd, CmdDaemonBase): return False core = Config(validate=False).config.get(Config.SECTION_CORE, {}) enabled = to_bool(core.get(Config.SECTION_CORE_ANALYTICS, "true")) logger.debug( "Analytics is {}.".format("enabled" if enabled else "disabled")) return enabled
def should_test(): do_test = env2bool("DVC_TEST_SSH", undefined=None) if do_test is not None: return do_test # FIXME: enable on windows if os.name == "nt": return False try: check_output(["ssh", "-o", "BatchMode=yes", "127.0.0.1", "ls"]) except (CalledProcessError, OSError): return False return True
def is_enabled(): from dvc.config import Config, to_bool from dvc.utils import env2bool if env2bool("DVC_TEST"): return False enabled = not os.getenv(DVC_NO_ANALYTICS) if enabled: enabled = to_bool( Config(validate=False).get("core", {}).get("analytics", "true")) logger.debug("Analytics is {}abled.".format("en" if enabled else "dis")) return enabled
def is_enabled(cmd=None): from dvc.command.daemon import CmdDaemonBase if env2bool("DVC_TEST"): return False if isinstance(cmd, CmdDaemonBase): return False config = (Analytics._get_current_config() if cmd is None or not hasattr(cmd, "config") else cmd.config) assert config is not None enabled = Analytics._is_enabled_config(config) logger.debug( "Analytics is {}.".format("enabled" if enabled else "disabled")) return enabled
def should_test(): do_test = env2bool("DVC_TEST_GCP", undefined=None) if do_test is not None: return do_test if not os.path.exists(TEST_GCP_CREDS_FILE): return False try: check_output([ "gcloud", "auth", "activate-service-account", "--key-file", TEST_GCP_CREDS_FILE, ]) except (CalledProcessError, OSError): return False return True
def checkpoint_callback( cls, dvc: "Repo", scm: "Git", name: Optional[str], force: bool, unchanged: Iterable["PipelineStage"], stages: Iterable["PipelineStage"], ): try: exp_hash = cls.hash_exp(list(stages) + list(unchanged)) exp_rev = cls.commit( scm, exp_hash, exp_name=name, force=force, checkpoint=True ) if env2bool(DVC_EXP_AUTO_PUSH): git_remote = os.getenv(DVC_EXP_GIT_REMOTE) cls._auto_push(dvc, scm, git_remote) logger.info("Checkpoint experiment iteration '%s'.", exp_rev[:7]) except UnchangedExperimentError: pass
def reproduce( cls, info: "ExecutorInfo", rev: str, queue: Optional["Queue"] = None, infofile: Optional[str] = None, log_errors: bool = True, log_level: Optional[int] = None, **kwargs, ) -> "ExecutorResult": """Run dvc repro and return the result. Returns tuple of (exp_hash, exp_ref, force) where exp_hash is the experiment hash (or None on error), exp_ref is the experiment ref, and force is a bool specifying whether or not this experiment should force overwrite any existing duplicates. """ from dvc.repo.checkout import checkout as dvc_checkout from dvc.repo.reproduce import reproduce as dvc_reproduce from dvc.stage import PipelineStage auto_push = env2bool(DVC_EXP_AUTO_PUSH) git_remote = os.getenv(DVC_EXP_GIT_REMOTE, None) unchanged = [] if queue is not None: queue.put((rev, os.getpid())) if log_errors and log_level is not None: cls._set_log_level(log_level) def filter_pipeline(stages): unchanged.extend( [stage for stage in stages if isinstance(stage, PipelineStage)] ) exp_hash: Optional[str] = None exp_ref: Optional["ExpRefInfo"] = None repro_force: bool = False if infofile is not None: info.dump_json(infofile) with cls._repro_dvc( info, log_errors=log_errors, **kwargs, ) as dvc: if auto_push: cls._validate_remotes(dvc, git_remote) args, kwargs = cls._repro_args(dvc) if args: targets: Optional[Union[list, str]] = args[0] else: targets = kwargs.get("targets") repro_force = kwargs.get("force", False) logger.trace( # type: ignore[attr-defined] "Executor repro with force = '%s'", str(repro_force) ) repro_dry = kwargs.get("dry") # NOTE: checkpoint outs are handled as a special type of persist # out: # # - checkpoint out may not yet exist if this is the first time this # experiment has been run, this is not an error condition for # experiments # - if experiment was run with --reset, the checkpoint out will be # removed at the start of the experiment (regardless of any # dvc.lock entry for the checkpoint out) # - if run without --reset, the checkpoint out will be checked out # using any hash present in dvc.lock (or removed if no entry # exists in dvc.lock) checkpoint_reset: bool = kwargs.pop("reset", False) if not repro_dry: dvc_checkout( dvc, targets=targets, with_deps=targets is not None, force=True, quiet=True, allow_missing=True, checkpoint_reset=checkpoint_reset, recursive=kwargs.get("recursive", False), ) checkpoint_func = partial( cls.checkpoint_callback, dvc, dvc.scm, info.name, repro_force or checkpoint_reset, ) stages = dvc_reproduce( dvc, *args, on_unchanged=filter_pipeline, checkpoint_func=checkpoint_func, **kwargs, ) exp_hash = cls.hash_exp(stages) if not repro_dry: ref, exp_ref, repro_force = cls._repro_commit( dvc, info, stages, exp_hash, checkpoint_reset, auto_push, git_remote, repro_force, ) info.result_hash = exp_hash info.result_ref = ref info.result_force = repro_force if infofile is not None: info.dump_json(infofile) # ideally we would return stages here like a normal repro() call, but # stages is not currently picklable and cannot be returned across # multiprocessing calls return ExecutorResult(exp_hash, exp_ref, repro_force)
def check(self): if os.getenv("CI") or env2bool("DVC_TEST"): return self._with_lock(self._check, "checking")
def __init__(self, repo): if not (env2bool("DVC_TEST") or repo.config["core"].get("experiments", False)): raise NotImplementedError self.repo = repo
def reproduce( cls, dvc_dir: Optional[str], rev: str, queue: Optional["Queue"] = None, rel_cwd: Optional[str] = None, name: Optional[str] = None, log_errors: bool = True, log_level: Optional[int] = None, **kwargs, ) -> "ExecutorResult": """Run dvc repro and return the result. Returns tuple of (exp_hash, exp_ref, force) where exp_hash is the experiment hash (or None on error), exp_ref is the experiment ref, and force is a bool specifying whether or not this experiment should force overwrite any existing duplicates. """ from dvc.repo.checkout import checkout as dvc_checkout from dvc.repo.reproduce import reproduce as dvc_reproduce auto_push = env2bool(DVC_EXP_AUTO_PUSH) git_remote = os.getenv(DVC_EXP_GIT_REMOTE, None) unchanged = [] if queue is not None: queue.put((rev, os.getpid())) if log_errors and log_level is not None: cls._set_log_level(log_level) def filter_pipeline(stages): unchanged.extend([ stage for stage in stages if isinstance(stage, PipelineStage) ]) exp_hash: Optional[str] = None exp_ref: Optional["ExpRefInfo"] = None repro_force: bool = False with cls._repro_dvc( dvc_dir, rel_cwd, log_errors, **kwargs, ) as dvc: if auto_push: cls._validate_remotes(dvc, git_remote) args, kwargs = cls._repro_args(dvc) if args: targets: Optional[Union[list, str]] = args[0] else: targets = kwargs.get("targets") repro_force = kwargs.get("force", False) logger.trace( # type: ignore[attr-defined] "Executor repro with force = '%s'", str(repro_force)) repro_dry = kwargs.get("dry") # NOTE: checkpoint outs are handled as a special type of persist # out: # # - checkpoint out may not yet exist if this is the first time this # experiment has been run, this is not an error condition for # experiments # - if experiment was run with --reset, the checkpoint out will be # removed at the start of the experiment (regardless of any # dvc.lock entry for the checkpoint out) # - if run without --reset, the checkpoint out will be checked out # using any hash present in dvc.lock (or removed if no entry # exists in dvc.lock) checkpoint_reset: bool = kwargs.pop("reset", False) if not repro_dry: dvc_checkout( dvc, targets=targets, with_deps=targets is not None, force=True, quiet=True, allow_missing=True, checkpoint_reset=checkpoint_reset, recursive=kwargs.get("recursive", False), ) checkpoint_func = partial( cls.checkpoint_callback, dvc, dvc.scm, name, repro_force or checkpoint_reset, ) stages = dvc_reproduce( dvc, *args, on_unchanged=filter_pipeline, checkpoint_func=checkpoint_func, **kwargs, ) exp_hash = cls.hash_exp(stages) if not repro_dry: try: is_checkpoint = any(stage.is_checkpoint for stage in stages) if is_checkpoint and checkpoint_reset: # For reset checkpoint stages, we need to force # overwriting existing checkpoint refs even though # repro may not have actually been run with --force repro_force = True cls.commit( dvc.scm, exp_hash, exp_name=name, force=repro_force, checkpoint=is_checkpoint, ) if auto_push: cls._auto_push(dvc, dvc.scm, git_remote) except UnchangedExperimentError: pass ref = dvc.scm.get_ref(EXEC_BRANCH, follow=False) if ref: exp_ref = ExpRefInfo.from_ref(ref) if cls.WARN_UNTRACKED: untracked = dvc.scm.untracked_files() if untracked: logger.warning( "The following untracked files were present in " "the experiment directory after reproduction but " "will not be included in experiment commits:\n" "\t%s", ", ".join(untracked), ) # ideally we would return stages here like a normal repro() call, but # stages is not currently picklable and cannot be returned across # multiprocessing calls return ExecutorResult(exp_hash, exp_ref, repro_force)
def __init__( self, root_dir=None, scm=None, rev=None, subrepos=False, uninitialized=False, config=None, url=None, repo_factory=None, ): from dvc.config import Config from dvc.data_cloud import DataCloud from dvc.fs.local import LocalFileSystem from dvc.lock import LockNoop, make_lock from dvc.machine import MachineManager from dvc.objects.db import ODBManager from dvc.repo.live import Live from dvc.repo.metrics import Metrics from dvc.repo.params import Params from dvc.repo.plots import Plots from dvc.repo.stage import StageLoad from dvc.scm import SCM from dvc.stage.cache import StageCache from dvc.state import State, StateNoop self.url = url self._fs_conf = {"repo_factory": repo_factory} if rev and not scm: scm = SCM(root_dir or os.curdir) self.root_dir, self.dvc_dir, self.tmp_dir = self._get_repo_dirs( root_dir=root_dir, scm=scm, rev=rev, uninitialized=uninitialized ) if scm: self._fs = scm.get_fs(rev) else: self._fs = LocalFileSystem(url=self.root_dir) self.config = Config(self.dvc_dir, fs=self.fs, config=config) self._uninitialized = uninitialized self._scm = scm # used by RepoFileSystem to determine if it should traverse subrepos self.subrepos = subrepos self.cloud = DataCloud(self) self.stage = StageLoad(self) if scm or not self.dvc_dir: self.lock = LockNoop() self.state = StateNoop() self.odb = ODBManager(self) else: self.lock = make_lock( os.path.join(self.tmp_dir, "lock"), tmp_dir=self.tmp_dir, hardlink_lock=self.config["core"].get("hardlink_lock", False), friendly=True, ) # NOTE: storing state and link_state in the repository itself to # avoid any possible state corruption in 'shared cache dir' # scenario. self.state = State(self.root_dir, self.tmp_dir, self.dvcignore) self.odb = ODBManager(self) self.stage_cache = StageCache(self) self._ignore() self.metrics = Metrics(self) self.plots = Plots(self) self.params = Params(self) self.live = Live(self) if self.tmp_dir and ( self.config["feature"].get("machine", False) or env2bool("DVC_TEST") ): self.machine = MachineManager(self) else: self.machine = None self.stage_collection_error_handler: Optional[ Callable[[str, Exception], None] ] = None self._lock_depth = 0