Example #1
0
    def _run(self):
        self._check_missing_deps()
        executable = os.getenv("SHELL") if os.name != "nt" else None
        self._warn_if_fish(executable)

        main_thread = isinstance(
            threading.current_thread(), threading._MainThread
        )
        if main_thread:
            old_handler = signal.signal(signal.SIGINT, signal.SIG_IGN)

        p = None

        try:
            p = subprocess.Popen(
                self.cmd,
                cwd=self.wdir,
                shell=True,
                env=fix_env(os.environ),
                executable=executable,
                close_fds=True,
            )
            p.communicate()
        finally:
            if main_thread:
                signal.signal(signal.SIGINT, old_handler)

        if (p is None) or (p.returncode != 0):
            raise StageCmdFailedError(self)
Example #2
0
def daemon(args):
    """Launch a `dvc daemon` command in a detached process.

    Args:
        args (list): list of arguments to append to `dvc daemon` command.
    """
    cmd = [sys.executable]
    if not is_binary():
        cmd += ["-m", "dvc"]
    cmd += ["daemon", "-q"] + args

    env = fix_env()
    file_path = os.path.abspath(inspect.stack()[0][1])
    env[cast_bytes_py2("PYTHONPATH")] = cast_bytes_py2(
        os.path.dirname(os.path.dirname(file_path))
    )

    logger.debug("Trying to spawn '{}' with env '{}'".format(cmd, env))

    if os.name == "nt":
        _spawn_windows(cmd, env)
    elif os.name == "posix":
        _spawn_posix(cmd, env)
    else:
        raise NotImplementedError

    logger.debug("Spawned '{}'".format(cmd))
Example #3
0
    def run(self, dry=False):
        if self.locked:
            msg = u'Verifying outputs in locked stage \'{}\''
            self.project.logger.info(msg.format(self.relpath))
            if not dry:
                self.check_missing_outputs()
        elif self.is_import:
            msg = u'Importing \'{}\' -> \'{}\''
            self.project.logger.info(
                msg.format(self.deps[0].path, self.outs[0].path))

            if not dry:
                self.deps[0].download(self.outs[0].path_info)
        elif self.is_data_source:
            msg = u'Verifying data sources in \'{}\''.format(self.relpath)
            self.project.logger.info(msg)
            if not dry:
                self.check_missing_outputs()
        else:
            msg = u'Running command:\n\t{}'.format(self.cmd)
            self.project.logger.info(msg)

            if not dry:
                self._check_missing_deps()
                p = subprocess.Popen(self.cmd,
                                     cwd=self.cwd,
                                     shell=True,
                                     env=fix_env(os.environ),
                                     executable=os.getenv('SHELL'))
                p.communicate()
                if p.returncode != 0:
                    raise StageCmdFailedError(self)

        if not dry:
            self.save()
Example #4
0
    def __init__(self, root_dir=os.curdir, search_parent_directories=True):
        """Git class constructor.
        Requires `Repo` class from `git` module (from gitpython package).
        """
        super().__init__(root_dir)

        import git
        from git.exc import InvalidGitRepositoryError

        try:
            self.repo = git.Repo(
                root_dir, search_parent_directories=search_parent_directories
            )
        except InvalidGitRepositoryError:
            msg = "{} is not a git repository"
            raise SCMError(msg.format(root_dir))

        # NOTE: fixing LD_LIBRARY_PATH for binary built by PyInstaller.
        # http://pyinstaller.readthedocs.io/en/stable/runtime-information.html
        env = fix_env(None)
        libpath = env.get("LD_LIBRARY_PATH", None)
        self.repo.git.update_environment(LD_LIBRARY_PATH=libpath)

        self.ignored_paths = []
        self.files_to_track = set()
Example #5
0
def _spawn_posix(cmd):
    # NOTE: using os._exit instead of sys.exit, because dvc built
    # with PyInstaller has trouble with SystemExit exeption and throws
    # errors such as "[26338] Failed to execute script __main__"
    try:
        pid = os.fork()
        if pid > 0:
            return
    except OSError:
        logger.error("failed at first fork")
        os._exit(1)  # pylint: disable=protected-access

    os.setsid()
    os.umask(0)

    try:
        pid = os.fork()
        if pid > 0:
            os._exit(0)  # pylint: disable=protected-access
    except OSError:
        logger.error("failed at second fork")
        os._exit(1)  # pylint: disable=protected-access

    sys.stdin.close()
    sys.stdout.close()
    sys.stderr.close()

    Popen(cmd, env=fix_env(), close_fds=True, shell=False).communicate()

    os._exit(0)  # pylint: disable=protected-access
Example #6
0
def test_fix_env_pyenv(path, orig):
    env = {
        "PATH": path,
        "PYENV_ROOT": "/pyenv",
        "PYENV_VERSION": "3.7.2",
        "PYENV_DIR": "/some/dir",
        "PYENV_HOOK_PATH": "/some/hook/path",
    }
    assert fix_env(env)["PATH"] == orig
Example #7
0
    def clone(
        url: str,
        to_path: str,
        rev: Optional[str] = None,
        shallow_branch: Optional[str] = None,
    ):
        import git

        ld_key = "LD_LIBRARY_PATH"

        env = fix_env(None)
        if is_binary() and ld_key not in env.keys():
            # In fix_env, we delete LD_LIBRARY_PATH key if it was empty before
            # PyInstaller modified it. GitPython, in git.Repo.clone_from, uses
            # env to update its own internal state. When there is no key in
            # env, this value is not updated and GitPython re-uses
            # LD_LIBRARY_PATH that has been set by PyInstaller.
            # See [1] for more info.
            # [1] https://github.com/gitpython-developers/GitPython/issues/924
            env[ld_key] = ""

        try:
            if shallow_branch is not None and os.path.exists(url):
                # git disables --depth for local clones unless file:// url
                # scheme is used
                url = f"file://{url}"
            with TqdmGit(desc="Cloning", unit="obj") as pbar:
                clone_from = partial(
                    git.Repo.clone_from,
                    url,
                    to_path,
                    env=env,  # needed before we can fix it in __init__
                    no_single_branch=True,
                    progress=pbar.update_git,
                )
                if shallow_branch is None:
                    tmp_repo = clone_from()
                else:
                    tmp_repo = clone_from(branch=shallow_branch, depth=1)
            tmp_repo.close()
        except git.exc.GitCommandError as exc:  # pylint: disable=no-member
            raise CloneError(url, to_path) from exc

        # NOTE: using our wrapper to make sure that env is fixed in __init__
        repo = GitPythonBackend(to_path)

        if rev:
            try:
                repo.checkout(rev)
            except git.exc.GitCommandError as exc:  # pylint: disable=no-member
                raise RevError(
                    "failed to access revision '{}' for repo '{}'".format(
                        rev, url
                    )
                ) from exc
Example #8
0
def cmd_run(stage, *args, checkpoint_func=None, **kwargs):
    kwargs = {"cwd": stage.wdir, "env": fix_env(None), "close_fds": True}
    cmd = stage.cmd if isinstance(stage.cmd, list) else [stage.cmd]
    if checkpoint_func:
        # indicate that checkpoint cmd is being run inside DVC
        kwargs["env"].update(_checkpoint_env(stage))

    if os.name == "nt":
        kwargs["shell"] = True
        executable = None
    else:
        # NOTE: when you specify `shell=True`, `Popen` [1] will default to
        # `/bin/sh` on *nix and will add ["/bin/sh", "-c"] to your command.
        # But we actually want to run the same shell that we are running
        # from right now, which is usually determined by the `SHELL` env
        # var. So instead, we compose our command on our own, making sure
        # to include special flags to prevent shell from reading any
        # configs and modifying env, which may change the behavior or the
        # command we are running. See [2] for more info.
        #
        # [1] https://github.com/python/cpython/blob/3.7/Lib/subprocess.py
        #                                                            #L1426
        # [2] https://github.com/iterative/dvc/issues/2506
        #                                           #issuecomment-535396799
        kwargs["shell"] = False
        executable = os.getenv("SHELL") or "/bin/sh"
        warn_if_fish(executable)

    main_thread = isinstance(
        threading.current_thread(),
        threading._MainThread,  # pylint: disable=protected-access
    )
    for _cmd in cmd:
        logger.info("$ %s", _cmd)
        old_handler = None
        p = None

        try:
            p = subprocess.Popen(_make_cmd(executable, _cmd), **kwargs)
            if main_thread:
                old_handler = signal.signal(signal.SIGINT, signal.SIG_IGN)

            killed = threading.Event()
            with checkpoint_monitor(stage, checkpoint_func, p, killed):
                p.communicate()
        finally:
            if old_handler:
                signal.signal(signal.SIGINT, old_handler)

        retcode = None if not p else p.returncode
        if retcode != 0:
            if killed.is_set():
                raise CheckpointKilledError(_cmd, retcode)
            raise StageCmdFailedError(_cmd, retcode)
Example #9
0
    def _run(self):
        self._check_missing_deps()
        executable = os.getenv('SHELL') if os.name != 'nt' else None
        self._check_if_fish(executable)

        p = subprocess.Popen(self.cmd,
                             cwd=self.cwd,
                             shell=True,
                             env=fix_env(os.environ),
                             executable=executable)
        p.communicate()

        if p.returncode != 0:
            raise StageCmdFailedError(self)
Example #10
0
def _spawn_windows(cmd):
    from subprocess import STARTUPINFO, STARTF_USESHOWWINDOW

    creationflags = CREATE_NEW_PROCESS_GROUP | DETACHED_PROCESS

    startupinfo = STARTUPINFO()
    startupinfo.dwFlags |= STARTF_USESHOWWINDOW

    Popen(cmd,
          env=fix_env(),
          close_fds=True,
          shell=False,
          creationflags=creationflags,
          startupinfo=startupinfo).communicate()
Example #11
0
    def __init__(self, root_dir=os.curdir):
        super(Git, self).__init__(root_dir)

        import git
        from git.exc import InvalidGitRepositoryError
        try:
            self.repo = git.Repo(root_dir)
        except InvalidGitRepositoryError:
            raise SCMError('{} is not a git repository'.format(root_dir))

        # NOTE: fixing LD_LIBRARY_PATH for binary built by PyInstaller.
        # http://pyinstaller.readthedocs.io/en/stable/runtime-information.html
        env = fix_env(None)
        lp = env.get('LD_LIBRARY_PATH', None)
        self.repo.git.update_environment(LD_LIBRARY_PATH=lp)
Example #12
0
    def _run(self):
        self._check_missing_deps()

        kwargs = {"cwd": self.wdir, "env": fix_env(None), "close_fds": True}

        if os.name == "nt":
            kwargs["shell"] = True
            cmd = self.cmd
        else:
            # NOTE: when you specify `shell=True`, `Popen` [1] will default to
            # `/bin/sh` on *nix and will add ["/bin/sh", "-c"] to your command.
            # But we actually want to run the same shell that we are running
            # from right now, which is usually determined by the `SHELL` env
            # var. So instead, we compose our command on our own, making sure
            # to include special flags to prevent shell from reading any
            # configs and modifying env, which may change the behavior or the
            # command we are running. See [2] for more info.
            #
            # [1] https://github.com/python/cpython/blob/3.7/Lib/subprocess.py
            #                                                            #L1426
            # [2] https://github.com/iterative/dvc/issues/2506
            #                                           #issuecomment-535396799
            kwargs["shell"] = False
            executable = os.getenv("SHELL") or "/bin/sh"

            self._warn_if_fish(executable)

            opts = {"zsh": ["--no-rcs"], "bash": ["--noprofile", "--norc"]}
            name = os.path.basename(executable).lower()
            cmd = [executable] + opts.get(name, []) + ["-c", self.cmd]

        main_thread = isinstance(
            threading.current_thread(), threading._MainThread
        )
        old_handler = None
        p = None

        try:
            p = subprocess.Popen(cmd, **kwargs)
            if main_thread:
                old_handler = signal.signal(signal.SIGINT, signal.SIG_IGN)
            p.communicate()
        finally:
            if old_handler:
                signal.signal(signal.SIGINT, old_handler)

        if (p is None) or (p.returncode != 0):
            raise StageCmdFailedError(self)
Example #13
0
 def hadoop_fs(self, cmd, user=None):
     cmd = 'hadoop fs -' + cmd
     if user:
         cmd = 'HADOOP_USER_NAME={} '.format(user) + cmd
     p = Popen(cmd,
               shell=True,
               close_fds=True,
               executable=os.getenv('SHELL'),
               env=fix_env(os.environ),
               stdin=PIPE,
               stdout=PIPE,
               stderr=PIPE)
     out, err = p.communicate()
     if p.returncode != 0:
         raise DvcException('HDFS command failed: {}: {}'.format(cmd, err))
     return out.decode('utf-8')
Example #14
0
    def _checksum(self, path_info, **kwargs):
        # PyArrow doesn't natively support retrieving the
        # checksum, so we have to use hadoop fs

        result = self._run_command(
            f"checksum {path_info.url}",
            env=fix_env(os.environ),
            user=path_info.user,
        )
        if result is None:
            return None

        match = CHECKSUM_REGEX.match(result)
        if match is None:
            return None

        return match.group("checksum")
Example #15
0
File: daemon.py Project: ush98/dvc
def daemon(args):
    """Launch a `dvc daemon` command in a detached process.

    Args:
        args (list): list of arguments to append to `dvc daemon` command.
    """
    if os.environ.get(DVC_DAEMON):
        logger.debug("skipping launching a new daemon.")
        return

    cmd = ["daemon", "-q"] + args

    env = fix_env()
    file_path = os.path.abspath(inspect.stack()[0][1])
    env["PYTHONPATH"] = os.path.dirname(os.path.dirname(file_path))
    env[DVC_DAEMON] = "1"

    _spawn(cmd, env)
Example #16
0
    def _checksum(self, path, **kwargs):
        # PyArrow doesn't natively support retrieving the
        # checksum, so we have to use hadoop fs

        url = self.unstrip_protocol(path)

        result = self._run_command(
            f"checksum {url}",
            env=fix_env(os.environ),
            user=self.fs_args["user"],
        )
        if result is None:
            return None

        match = CHECKSUM_REGEX.match(result)
        if match is None:
            return None

        return match.group("checksum")
Example #17
0
    def clone(url, to_path, rev=None):
        import git

        ld_key = "LD_LIBRARY_PATH"

        env = fix_env(None)
        if is_binary() and ld_key not in env.keys():
            # In fix_env, we delete LD_LIBRARY_PATH key if it was empty before
            # PyInstaller modified it. GitPython, in git.Repo.clone_from, uses
            # env to update its own internal state. When there is no key in
            # env, this value is not updated and GitPython re-uses
            # LD_LIBRARY_PATH that has been set by PyInstaller.
            # See [1] for more info.
            # [1] https://github.com/gitpython-developers/GitPython/issues/924
            env[ld_key] = ""

        try:
            with TqdmGit(desc="Cloning", unit="obj") as pbar:
                tmp_repo = git.Repo.clone_from(
                    url,
                    to_path,
                    env=env,  # needed before we can fix it in __init__
                    no_single_branch=True,
                    progress=pbar.update_git,
                )
            tmp_repo.close()
        except git.exc.GitCommandError as exc:  # pylint: disable=no-member
            raise CloneError(url, to_path) from exc

        # NOTE: using our wrapper to make sure that env is fixed in __init__
        repo = Git(to_path)

        if rev:
            try:
                repo.checkout(rev)
            except git.exc.GitCommandError as exc:  # pylint: disable=no-member
                raise RevError(
                    "failed to access revision '{}' for repo '{}'".format(
                        rev, url
                    )
                ) from exc

        return repo
Example #18
0
    def __init__(  # pylint:disable=W0231
        self, root_dir=os.curdir, search_parent_directories=True
    ):
        import git
        from git.exc import InvalidGitRepositoryError

        try:
            self.repo = git.Repo(
                root_dir, search_parent_directories=search_parent_directories
            )
        except InvalidGitRepositoryError:
            msg = "{} is not a git repository"
            raise SCMError(msg.format(root_dir))

        # NOTE: fixing LD_LIBRARY_PATH for binary built by PyInstaller.
        # http://pyinstaller.readthedocs.io/en/stable/runtime-information.html
        env = fix_env(None)
        libpath = env.get("LD_LIBRARY_PATH", None)
        self.repo.git.update_environment(LD_LIBRARY_PATH=libpath)
Example #19
0
    def shell_command(self, cmd, user=None):
        # NOTE: close_fds doesn't work with redirected stdin/stdout/stderr.
        # See https://github.com/iterative/dvc/issues/1197.
        close_fds = os.name != "nt"

        executable = os.getenv("SHELL") if os.name != "nt" else None
        p = Popen(
            cmd,
            shell=True,
            close_fds=close_fds,
            executable=executable,
            env=fix_env(os.environ),
            stdin=PIPE,
            stdout=PIPE,
            stderr=PIPE,
        )
        out, err = p.communicate()
        if p.returncode != 0:
            raise RemoteCmdError(self.scheme, cmd, p.returncode, err)
        return out.decode("utf-8")
Example #20
0
    def __init__(self, root_dir=os.curdir, repo=None):
        super(Git, self).__init__(root_dir, repo=repo)

        import git
        from git.exc import InvalidGitRepositoryError

        try:
            self.git = git.Repo(root_dir)
        except InvalidGitRepositoryError:
            msg = "{} is not a git repository"
            raise SCMError(msg.format(root_dir))

        # NOTE: fixing LD_LIBRARY_PATH for binary built by PyInstaller.
        # http://pyinstaller.readthedocs.io/en/stable/runtime-information.html
        env = fix_env(None)
        libpath = env.get("LD_LIBRARY_PATH", None)
        self.git.git.update_environment(LD_LIBRARY_PATH=libpath)

        self.ignored_paths = []
        self.files_to_track = []
Example #21
0
def prepare_kwargs(stage, checkpoint_func=None):
    kwargs = {"cwd": stage.wdir, "env": fix_env(None), "close_fds": True}

    kwargs["env"].update(stage.env(checkpoint_func=checkpoint_func))

    # NOTE: when you specify `shell=True`, `Popen` [1] will default to
    # `/bin/sh` on *nix and will add ["/bin/sh", "-c"] to your command.
    # But we actually want to run the same shell that we are running
    # from right now, which is usually determined by the `SHELL` env
    # var. So instead, we compose our command on our own, making sure
    # to include special flags to prevent shell from reading any
    # configs and modifying env, which may change the behavior or the
    # command we are running. See [2] for more info.
    #
    # [1] https://github.com/python/cpython/blob/3.7/Lib/subprocess.py
    #                                                            #L1426
    # [2] https://github.com/iterative/dvc/issues/2506
    #                                           #issuecomment-535396799
    kwargs["shell"] = True if os.name == "nt" else False
    return kwargs
Example #22
0
def cmd_run(stage, *args, **kwargs):
    kwargs = {"cwd": stage.wdir, "env": fix_env(None), "close_fds": True}

    if os.name == "nt":
        kwargs["shell"] = True
        cmd = stage.cmd
    else:
        # NOTE: when you specify `shell=True`, `Popen` [1] will default to
        # `/bin/sh` on *nix and will add ["/bin/sh", "-c"] to your command.
        # But we actually want to run the same shell that we are running
        # from right now, which is usually determined by the `SHELL` env
        # var. So instead, we compose our command on our own, making sure
        # to include special flags to prevent shell from reading any
        # configs and modifying env, which may change the behavior or the
        # command we are running. See [2] for more info.
        #
        # [1] https://github.com/python/cpython/blob/3.7/Lib/subprocess.py
        #                                                            #L1426
        # [2] https://github.com/iterative/dvc/issues/2506
        #                                           #issuecomment-535396799
        kwargs["shell"] = False
        executable = os.getenv("SHELL") or "/bin/sh"
        warn_if_fish(executable)
        cmd = _nix_cmd(executable, stage.cmd)

    main_thread = isinstance(threading.current_thread(), threading._MainThread)
    old_handler = None
    p = None

    try:
        p = subprocess.Popen(cmd, **kwargs)
        if main_thread:
            old_handler = signal.signal(signal.SIGINT, signal.SIG_IGN)
        p.communicate()
    finally:
        if old_handler:
            signal.signal(signal.SIGINT, old_handler)

    retcode = None if not p else p.returncode
    if retcode != 0:
        raise StageCmdFailedError(stage.cmd, retcode)
Example #23
0
File: hdfs.py Project: hochm/dvc
    def hadoop_fs(self, cmd, user=None):
        cmd = 'hadoop fs -' + cmd
        if user:
            cmd = 'HADOOP_USER_NAME={} '.format(user) + cmd

        # NOTE: close_fds doesn't work with redirected stdin/stdout/stderr.
        # See https://github.com/iterative/dvc/issues/1197.
        close_fds = (os.name != 'nt')

        p = Popen(cmd,
                  shell=True,
                  close_fds=close_fds,
                  executable=os.getenv('SHELL'),
                  env=fix_env(os.environ),
                  stdin=PIPE,
                  stdout=PIPE,
                  stderr=PIPE)
        out, err = p.communicate()
        if p.returncode != 0:
            raise DvcException('HDFS command failed: {}: {}'.format(cmd, err))
        return out.decode('utf-8')
Example #24
0
def daemon(args):
    """Launch a `dvc daemon` command in a detached process.

    Args:
        args (list): list of arguments to append to `dvc daemon` command.
    """
    if os.environ.get(DVC_DAEMON):
        logger.debug("skipping launching a new daemon.")
        return

    cmd = [sys.executable]
    if not is_binary():
        cmd += [sys.argv[0]]
    cmd += ["daemon", "-q"] + args

    env = fix_env()
    file_path = os.path.abspath(inspect.stack()[0][1])
    env[cast_bytes_py2("PYTHONPATH")] = cast_bytes_py2(
        os.path.dirname(os.path.dirname(file_path)))
    env[cast_bytes_py2(DVC_DAEMON)] = cast_bytes_py2("1")

    _spawn(cmd, env)
Example #25
0
    def clone(url, to_path, rev=None):
        import git

        ld_key = "LD_LIBRARY_PATH"

        env = fix_env(None)
        if is_binary() and ld_key not in env.keys():
            # In fix_env, we delete LD_LIBRARY_PATH key if it was empty before
            # PyInstaller modified it. GitPython, in git.Repo.clone_from, uses
            # env to update its own internal state. When there is no key in
            # env, this value is not updated and GitPython re-uses
            # LD_LIBRARY_PATH that has been set by PyInstaller.
            # See [1] for more info.
            # [1] https://github.com/gitpython-developers/GitPython/issues/924
            env[cast_bytes_py2(ld_key)] = ""

        try:
            tmp_repo = git.Repo.clone_from(
                url,
                to_path,
                env=env,  # needed before we can fix it in __init__
                no_single_branch=True,
            )
            tmp_repo.close()
        except git.exc.GitCommandError as exc:
            raise CloneError(url, to_path, exc)

        # NOTE: using our wrapper to make sure that env is fixed in __init__
        repo = Git(to_path)

        if rev:
            try:
                repo.checkout(rev)
            except git.exc.GitCommandError as exc:
                raise RevError(url, rev, exc)

        return repo
Example #26
0
def _hadoop_fs(cmd, user=None):
    cmd = "hadoop fs -" + cmd
    if user:
        cmd = f"HADOOP_USER_NAME={user} " + cmd

    # NOTE: close_fds doesn't work with redirected stdin/stdout/stderr.
    # See https://github.com/iterative/dvc/issues/1197.
    close_fds = os.name != "nt"

    executable = os.getenv("SHELL") if os.name != "nt" else None
    p = subprocess.Popen(
        cmd,
        shell=True,
        close_fds=close_fds,
        executable=executable,
        env=fix_env(os.environ),
        stdin=subprocess.PIPE,
        stdout=subprocess.PIPE,
        stderr=subprocess.PIPE,
    )
    out, err = p.communicate()
    if p.returncode != 0:
        raise RemoteCmdError("hdfs", cmd, p.returncode, err)
    return out.decode("utf-8")
Example #27
0
    def clone(url, to_path, rev=None):
        import git

        try:
            tmp_repo = git.Repo.clone_from(
                url,
                to_path,
                env=fix_env(None),  # needed before we can fix it in __init__
                no_single_branch=True,
            )
            tmp_repo.close()
        except git.exc.GitCommandError as exc:
            raise CloneError(url, to_path, exc)

        # NOTE: using our wrapper to make sure that env is fixed in __init__
        repo = Git(to_path)

        if rev:
            try:
                repo.checkout(rev)
            except git.exc.GitCommandError as exc:
                raise RevError(url, rev, exc)

        return repo
Example #28
0
def test_fix_env_pyenv(path, orig):
    env = {"PATH": path, "PYENV_ROOT": "/pyenv"}
    assert fix_env(env)["PATH"] == orig