Esempio n. 1
0
    def should_test():
        do_test = env2bool("DVC_TEST_AZURE", undefined=None)
        if do_test is not None:
            return do_test

        return os.getenv("AZURE_STORAGE_CONTAINER_NAME") and os.getenv(
            "AZURE_STORAGE_CONNECTION_STRING")
Esempio n. 2
0
 def __init__(
     self,
     iterable=None,
     disable=None,
     level=logging.ERROR,
     desc=None,
     leave=False,
     bar_format=None,
     bytes=False,  # pylint: disable=redefined-builtin
     file=None,
     total=None,
     postfix=None,
     **kwargs,
 ):
     """
     bytes   : shortcut for
         `unit='B', unit_scale=True, unit_divisor=1024, miniters=1`
     desc  : persists after `close()`
     level  : effective logging level for determining `disable`;
         used only if `disable` is unspecified
     disable  : If (default: None) or False,
         will be determined by logging level.
         May be overridden to `True` due to non-TTY status.
         Skip override by specifying env var `DVC_IGNORE_ISATTY`.
     kwargs  : anything accepted by `tqdm.tqdm()`
     """
     kwargs = kwargs.copy()
     if bytes:
         kwargs = {**self.BYTES_DEFAULTS, **kwargs}
     else:
         kwargs.setdefault("unit_scale", total > 999 if total else True)
     if file is None:
         file = sys.stderr
     # auto-disable based on `logger.level`
     if not disable:
         disable = logger.getEffectiveLevel() > level
     # auto-disable based on TTY
     if (not disable and not env2bool(DVC_IGNORE_ISATTY)
             and hasattr(file, "isatty")):
         disable = not file.isatty()
     super().__init__(
         iterable=iterable,
         disable=disable,
         leave=leave,
         desc=desc,
         bar_format="!",
         lock_args=(False, ),
         total=total,
         **kwargs,
     )
     self.postfix = postfix or {"info": ""}
     if bar_format is None:
         if self.__len__():
             self.bar_format = (self.BAR_FMT_DEFAULT_NESTED
                                if self.pos else self.BAR_FMT_DEFAULT)
         else:
             self.bar_format = self.BAR_FMT_NOTOTAL
     else:
         self.bar_format = bar_format
     self.refresh()
Esempio n. 3
0
    def should_test():
        do_test = env2bool("DVC_TEST_OSS", undefined=None)
        if do_test is not None:
            return do_test

        return (os.getenv("OSS_ENDPOINT") and os.getenv("OSS_ACCESS_KEY_ID")
                and os.getenv("OSS_ACCESS_KEY_SECRET"))
Esempio n. 4
0
    def machine(self):
        from dvc.machine import MachineManager

        if self.tmp_dir and (self.config["feature"].get("machine", False)
                             or env2bool("DVC_TEST")):
            return MachineManager(self)
        return None
Esempio n. 5
0
 def __init__(
     self,
     iterable=None,
     disable=None,
     level=logging.ERROR,
     desc=None,
     leave=False,
     bar_format=None,
     bytes=False,  # pylint: disable=W0622
     file=None,
     **kwargs
 ):
     """
     bytes   : shortcut for
         `unit='B', unit_scale=True, unit_divisor=1024, miniters=1`
     desc  : persists after `close()`
     level  : effective logging level for determining `disable`;
         used only if `disable` is unspecified
     disable  : If (default: None), will be determined by logging level.
         May be overridden to `True` due to non-TTY status.
         Skip override by specifying env var `DVC_IGNORE_ISATTY`.
     kwargs  : anything accepted by `tqdm.tqdm()`
     """
     kwargs = kwargs.copy()
     kwargs.setdefault("unit_scale", True)
     if bytes:
         bytes_defaults = dict(
             unit="B", unit_scale=True, unit_divisor=1024, miniters=1
         )
         kwargs = merge(bytes_defaults, kwargs)
     if file is None:
         file = sys.stderr
     self.desc_persist = desc
     # auto-disable based on `logger.level`
     if disable is None:
         disable = logger.getEffectiveLevel() > level
     # auto-disable based on TTY
     if (
         not disable
         and not env2bool("DVC_IGNORE_ISATTY")
         and hasattr(file, "isatty")
     ):
         disable = not file.isatty()
     super(Tqdm, self).__init__(
         iterable=iterable,
         disable=disable,
         leave=leave,
         desc=desc,
         bar_format="!",
         **kwargs
     )
     if bar_format is None:
         if self.__len__():
             self.bar_format = self.BAR_FMT_DEFAULT
         else:
             self.bar_format = self.BAR_FMT_NOTOTAL
     else:
         self.bar_format = bar_format
     self.refresh()
Esempio n. 6
0
    def check(self):
        from dvc.utils import env2bool

        if (os.getenv("CI") or env2bool("DVC_TEST") or PKG == "snap"
                or not self.is_enabled()):
            return

        self._with_lock(self._check, "checking")
Esempio n. 7
0
def _should_test_aws():
    do_test = env2bool("DVC_TEST_AWS", undefined=None)
    if do_test is not None:
        return do_test

    if os.getenv("AWS_ACCESS_KEY_ID") and os.getenv("AWS_SECRET_ACCESS_KEY"):
        return True

    return False
Esempio n. 8
0
def is_enabled():
    if env2bool("DVC_TEST"):
        return False

    enabled = to_bool(
        Config(validate=False).get("core", {}).get("analytics", "true"))
    logger.debug("Analytics is {}abled.".format("en" if enabled else "dis"))

    return enabled
Esempio n. 9
0
    def should_test():
        do_test = env2bool("DVC_TEST_GCP", undefined=None)
        if do_test is not None:
            return do_test

        if not os.path.exists(TEST_GCP_CREDS_FILE):
            return False

        return True
Esempio n. 10
0
    def should_test():
        do_test = env2bool("DVC_TEST_OSS", undefined=None)
        if do_test is not None:
            return do_test

        if os.getenv("OSS_ACCESS_KEY_ID") and os.getenv(
                "OSS_ACCESS_KEY_SECRET"):
            return True

        return False
Esempio n. 11
0
def test_quiet_notty(caplog, capsys):
    with caplog.at_level(logging.INFO, logger="dvc"):
        for _ in Tqdm(range(10)):
            pass
        out_err = capsys.readouterr()
        assert out_err.out == ""
        if env2bool("DVC_IGNORE_ISATTY"):
            assert "0/10" in out_err.err
        else:
            assert out_err.err == ""
Esempio n. 12
0
def is_enabled():
    if env2bool("DVC_TEST"):
        return False

    enabled = to_bool(
        Config(validate=False).config.get(Config.SECTION_CORE, {}).get(
            Config.SECTION_CORE_ANALYTICS, "true"))

    logger.debug("Analytics is {}abled.".format("en" if enabled else "dis"))

    return enabled
Esempio n. 13
0
    def __init__(self, repo):
        from dvc.lock import make_lock

        if not (env2bool("DVC_TEST")
                or repo.config["core"].get("experiments", False)):
            raise NotImplementedError

        self.repo = repo
        self.scm_lock = make_lock(
            os.path.join(self.repo.tmp_dir, "exp_scm_lock"),
            tmp_dir=self.repo.tmp_dir,
        )
Esempio n. 14
0
    def is_enabled(cmd=None):
        from dvc.config import Config, to_bool
        from dvc.command.daemon import CmdDaemonBase

        if env2bool("DVC_TEST"):
            return False

        if isinstance(cmd, CmdDaemonBase):
            return False

        core = Config(validate=False).config.get(Config.SECTION_CORE, {})
        enabled = to_bool(core.get(Config.SECTION_CORE_ANALYTICS, "true"))
        logger.debug(
            "Analytics is {}.".format("enabled" if enabled else "disabled"))
        return enabled
Esempio n. 15
0
    def should_test():
        do_test = env2bool("DVC_TEST_SSH", undefined=None)
        if do_test is not None:
            return do_test

        # FIXME: enable on windows
        if os.name == "nt":
            return False

        try:
            check_output(["ssh", "-o", "BatchMode=yes", "127.0.0.1", "ls"])
        except (CalledProcessError, OSError):
            return False

        return True
Esempio n. 16
0
def is_enabled():
    from dvc.config import Config, to_bool
    from dvc.utils import env2bool

    if env2bool("DVC_TEST"):
        return False

    enabled = not os.getenv(DVC_NO_ANALYTICS)
    if enabled:
        enabled = to_bool(
            Config(validate=False).get("core", {}).get("analytics", "true"))

    logger.debug("Analytics is {}abled.".format("en" if enabled else "dis"))

    return enabled
Esempio n. 17
0
    def is_enabled(cmd=None):
        from dvc.command.daemon import CmdDaemonBase

        if env2bool("DVC_TEST"):
            return False

        if isinstance(cmd, CmdDaemonBase):
            return False

        config = (Analytics._get_current_config()
                  if cmd is None or not hasattr(cmd, "config") else cmd.config)

        assert config is not None

        enabled = Analytics._is_enabled_config(config)
        logger.debug(
            "Analytics is {}.".format("enabled" if enabled else "disabled"))
        return enabled
Esempio n. 18
0
    def should_test():
        do_test = env2bool("DVC_TEST_GCP", undefined=None)
        if do_test is not None:
            return do_test

        if not os.path.exists(TEST_GCP_CREDS_FILE):
            return False

        try:
            check_output([
                "gcloud",
                "auth",
                "activate-service-account",
                "--key-file",
                TEST_GCP_CREDS_FILE,
            ])
        except (CalledProcessError, OSError):
            return False
        return True
Esempio n. 19
0
    def checkpoint_callback(
        cls,
        dvc: "Repo",
        scm: "Git",
        name: Optional[str],
        force: bool,
        unchanged: Iterable["PipelineStage"],
        stages: Iterable["PipelineStage"],
    ):
        try:
            exp_hash = cls.hash_exp(list(stages) + list(unchanged))
            exp_rev = cls.commit(
                scm, exp_hash, exp_name=name, force=force, checkpoint=True
            )

            if env2bool(DVC_EXP_AUTO_PUSH):
                git_remote = os.getenv(DVC_EXP_GIT_REMOTE)
                cls._auto_push(dvc, scm, git_remote)
            logger.info("Checkpoint experiment iteration '%s'.", exp_rev[:7])
        except UnchangedExperimentError:
            pass
Esempio n. 20
0
    def reproduce(
        cls,
        info: "ExecutorInfo",
        rev: str,
        queue: Optional["Queue"] = None,
        infofile: Optional[str] = None,
        log_errors: bool = True,
        log_level: Optional[int] = None,
        **kwargs,
    ) -> "ExecutorResult":
        """Run dvc repro and return the result.

        Returns tuple of (exp_hash, exp_ref, force) where exp_hash is the
            experiment hash (or None on error), exp_ref is the experiment ref,
            and force is a bool specifying whether or not this experiment
            should force overwrite any existing duplicates.
        """
        from dvc.repo.checkout import checkout as dvc_checkout
        from dvc.repo.reproduce import reproduce as dvc_reproduce
        from dvc.stage import PipelineStage

        auto_push = env2bool(DVC_EXP_AUTO_PUSH)
        git_remote = os.getenv(DVC_EXP_GIT_REMOTE, None)

        unchanged = []

        if queue is not None:
            queue.put((rev, os.getpid()))
        if log_errors and log_level is not None:
            cls._set_log_level(log_level)

        def filter_pipeline(stages):
            unchanged.extend(
                [stage for stage in stages if isinstance(stage, PipelineStage)]
            )

        exp_hash: Optional[str] = None
        exp_ref: Optional["ExpRefInfo"] = None
        repro_force: bool = False

        if infofile is not None:
            info.dump_json(infofile)

        with cls._repro_dvc(
            info,
            log_errors=log_errors,
            **kwargs,
        ) as dvc:
            if auto_push:
                cls._validate_remotes(dvc, git_remote)

            args, kwargs = cls._repro_args(dvc)
            if args:
                targets: Optional[Union[list, str]] = args[0]
            else:
                targets = kwargs.get("targets")

            repro_force = kwargs.get("force", False)
            logger.trace(  # type: ignore[attr-defined]
                "Executor repro with force = '%s'", str(repro_force)
            )

            repro_dry = kwargs.get("dry")

            # NOTE: checkpoint outs are handled as a special type of persist
            # out:
            #
            # - checkpoint out may not yet exist if this is the first time this
            #   experiment has been run, this is not an error condition for
            #   experiments
            # - if experiment was run with --reset, the checkpoint out will be
            #   removed at the start of the experiment (regardless of any
            #   dvc.lock entry for the checkpoint out)
            # - if run without --reset, the checkpoint out will be checked out
            #   using any hash present in dvc.lock (or removed if no entry
            #   exists in dvc.lock)
            checkpoint_reset: bool = kwargs.pop("reset", False)
            if not repro_dry:
                dvc_checkout(
                    dvc,
                    targets=targets,
                    with_deps=targets is not None,
                    force=True,
                    quiet=True,
                    allow_missing=True,
                    checkpoint_reset=checkpoint_reset,
                    recursive=kwargs.get("recursive", False),
                )

            checkpoint_func = partial(
                cls.checkpoint_callback,
                dvc,
                dvc.scm,
                info.name,
                repro_force or checkpoint_reset,
            )
            stages = dvc_reproduce(
                dvc,
                *args,
                on_unchanged=filter_pipeline,
                checkpoint_func=checkpoint_func,
                **kwargs,
            )

            exp_hash = cls.hash_exp(stages)
            if not repro_dry:
                ref, exp_ref, repro_force = cls._repro_commit(
                    dvc,
                    info,
                    stages,
                    exp_hash,
                    checkpoint_reset,
                    auto_push,
                    git_remote,
                    repro_force,
                )
            info.result_hash = exp_hash
            info.result_ref = ref
            info.result_force = repro_force

        if infofile is not None:
            info.dump_json(infofile)

        # ideally we would return stages here like a normal repro() call, but
        # stages is not currently picklable and cannot be returned across
        # multiprocessing calls
        return ExecutorResult(exp_hash, exp_ref, repro_force)
Esempio n. 21
0
    def check(self):
        if os.getenv("CI") or env2bool("DVC_TEST"):
            return

        self._with_lock(self._check, "checking")
Esempio n. 22
0
    def __init__(self, repo):
        if not (env2bool("DVC_TEST")
                or repo.config["core"].get("experiments", False)):
            raise NotImplementedError

        self.repo = repo
Esempio n. 23
0
    def reproduce(
        cls,
        dvc_dir: Optional[str],
        rev: str,
        queue: Optional["Queue"] = None,
        rel_cwd: Optional[str] = None,
        name: Optional[str] = None,
        log_errors: bool = True,
        log_level: Optional[int] = None,
        **kwargs,
    ) -> "ExecutorResult":
        """Run dvc repro and return the result.

        Returns tuple of (exp_hash, exp_ref, force) where exp_hash is the
            experiment hash (or None on error), exp_ref is the experiment ref,
            and force is a bool specifying whether or not this experiment
            should force overwrite any existing duplicates.
        """
        from dvc.repo.checkout import checkout as dvc_checkout
        from dvc.repo.reproduce import reproduce as dvc_reproduce

        auto_push = env2bool(DVC_EXP_AUTO_PUSH)
        git_remote = os.getenv(DVC_EXP_GIT_REMOTE, None)

        unchanged = []

        if queue is not None:
            queue.put((rev, os.getpid()))
        if log_errors and log_level is not None:
            cls._set_log_level(log_level)

        def filter_pipeline(stages):
            unchanged.extend([
                stage for stage in stages if isinstance(stage, PipelineStage)
            ])

        exp_hash: Optional[str] = None
        exp_ref: Optional["ExpRefInfo"] = None
        repro_force: bool = False

        with cls._repro_dvc(
                dvc_dir,
                rel_cwd,
                log_errors,
                **kwargs,
        ) as dvc:
            if auto_push:
                cls._validate_remotes(dvc, git_remote)

            args, kwargs = cls._repro_args(dvc)
            if args:
                targets: Optional[Union[list, str]] = args[0]
            else:
                targets = kwargs.get("targets")

            repro_force = kwargs.get("force", False)
            logger.trace(  # type: ignore[attr-defined]
                "Executor repro with force = '%s'", str(repro_force))

            repro_dry = kwargs.get("dry")

            # NOTE: checkpoint outs are handled as a special type of persist
            # out:
            #
            # - checkpoint out may not yet exist if this is the first time this
            #   experiment has been run, this is not an error condition for
            #   experiments
            # - if experiment was run with --reset, the checkpoint out will be
            #   removed at the start of the experiment (regardless of any
            #   dvc.lock entry for the checkpoint out)
            # - if run without --reset, the checkpoint out will be checked out
            #   using any hash present in dvc.lock (or removed if no entry
            #   exists in dvc.lock)
            checkpoint_reset: bool = kwargs.pop("reset", False)
            if not repro_dry:
                dvc_checkout(
                    dvc,
                    targets=targets,
                    with_deps=targets is not None,
                    force=True,
                    quiet=True,
                    allow_missing=True,
                    checkpoint_reset=checkpoint_reset,
                    recursive=kwargs.get("recursive", False),
                )

            checkpoint_func = partial(
                cls.checkpoint_callback,
                dvc,
                dvc.scm,
                name,
                repro_force or checkpoint_reset,
            )
            stages = dvc_reproduce(
                dvc,
                *args,
                on_unchanged=filter_pipeline,
                checkpoint_func=checkpoint_func,
                **kwargs,
            )

            exp_hash = cls.hash_exp(stages)
            if not repro_dry:
                try:
                    is_checkpoint = any(stage.is_checkpoint
                                        for stage in stages)
                    if is_checkpoint and checkpoint_reset:
                        # For reset checkpoint stages, we need to force
                        # overwriting existing checkpoint refs even though
                        # repro may not have actually been run with --force
                        repro_force = True
                    cls.commit(
                        dvc.scm,
                        exp_hash,
                        exp_name=name,
                        force=repro_force,
                        checkpoint=is_checkpoint,
                    )
                    if auto_push:
                        cls._auto_push(dvc, dvc.scm, git_remote)
                except UnchangedExperimentError:
                    pass
                ref = dvc.scm.get_ref(EXEC_BRANCH, follow=False)
                if ref:
                    exp_ref = ExpRefInfo.from_ref(ref)
                if cls.WARN_UNTRACKED:
                    untracked = dvc.scm.untracked_files()
                    if untracked:
                        logger.warning(
                            "The following untracked files were present in "
                            "the experiment directory after reproduction but "
                            "will not be included in experiment commits:\n"
                            "\t%s",
                            ", ".join(untracked),
                        )

        # ideally we would return stages here like a normal repro() call, but
        # stages is not currently picklable and cannot be returned across
        # multiprocessing calls
        return ExecutorResult(exp_hash, exp_ref, repro_force)
Esempio n. 24
0
    def __init__(
        self,
        root_dir=None,
        scm=None,
        rev=None,
        subrepos=False,
        uninitialized=False,
        config=None,
        url=None,
        repo_factory=None,
    ):
        from dvc.config import Config
        from dvc.data_cloud import DataCloud
        from dvc.fs.local import LocalFileSystem
        from dvc.lock import LockNoop, make_lock
        from dvc.machine import MachineManager
        from dvc.objects.db import ODBManager
        from dvc.repo.live import Live
        from dvc.repo.metrics import Metrics
        from dvc.repo.params import Params
        from dvc.repo.plots import Plots
        from dvc.repo.stage import StageLoad
        from dvc.scm import SCM
        from dvc.stage.cache import StageCache
        from dvc.state import State, StateNoop

        self.url = url
        self._fs_conf = {"repo_factory": repo_factory}

        if rev and not scm:
            scm = SCM(root_dir or os.curdir)

        self.root_dir, self.dvc_dir, self.tmp_dir = self._get_repo_dirs(
            root_dir=root_dir, scm=scm, rev=rev, uninitialized=uninitialized
        )

        if scm:
            self._fs = scm.get_fs(rev)
        else:
            self._fs = LocalFileSystem(url=self.root_dir)

        self.config = Config(self.dvc_dir, fs=self.fs, config=config)
        self._uninitialized = uninitialized
        self._scm = scm

        # used by RepoFileSystem to determine if it should traverse subrepos
        self.subrepos = subrepos

        self.cloud = DataCloud(self)
        self.stage = StageLoad(self)

        if scm or not self.dvc_dir:
            self.lock = LockNoop()
            self.state = StateNoop()
            self.odb = ODBManager(self)
        else:
            self.lock = make_lock(
                os.path.join(self.tmp_dir, "lock"),
                tmp_dir=self.tmp_dir,
                hardlink_lock=self.config["core"].get("hardlink_lock", False),
                friendly=True,
            )

            # NOTE: storing state and link_state in the repository itself to
            # avoid any possible state corruption in 'shared cache dir'
            # scenario.
            self.state = State(self.root_dir, self.tmp_dir, self.dvcignore)
            self.odb = ODBManager(self)

            self.stage_cache = StageCache(self)

            self._ignore()

        self.metrics = Metrics(self)
        self.plots = Plots(self)
        self.params = Params(self)
        self.live = Live(self)

        if self.tmp_dir and (
            self.config["feature"].get("machine", False)
            or env2bool("DVC_TEST")
        ):
            self.machine = MachineManager(self)
        else:
            self.machine = None

        self.stage_collection_error_handler: Optional[
            Callable[[str, Exception], None]
        ] = None
        self._lock_depth = 0