Beispiel #1
0
def copyfile(src, dest, no_progress_bar=False, name=None):
    """Copy file with progress bar"""
    from dvc.exceptions import DvcException
    from dvc.progress import Tqdm
    from dvc.system import System

    src = fspath_py35(src)
    dest = fspath_py35(dest)

    name = name if name else os.path.basename(dest)
    total = os.stat(src).st_size

    if os.path.isdir(dest):
        dest = os.path.join(dest, os.path.basename(src))

    try:
        System.reflink(src, dest)
    except DvcException:
        with Tqdm(desc=name, disable=no_progress_bar, total=total,
                  bytes=True) as pbar:
            with open(src, "rb") as fsrc, open(dest, "wb+") as fdest:
                while True:
                    buf = fsrc.read(LOCAL_CHUNK_SIZE)
                    if not buf:
                        break
                    fdest.write(buf)
                    pbar.update(len(buf))
Beispiel #2
0
    def ignore(self, path):
        entry, gitignore = self._get_gitignore(path)

        ignore_list = []
        if os.path.exists(gitignore):
            ignore_list = open(gitignore, "r").readlines()
            filtered = list(
                filter(lambda x: x.strip() == entry.strip(), ignore_list)
            )
            if filtered:
                return

        msg = "Adding '{}' to '{}'.".format(
            os.path.relpath(path), os.path.relpath(gitignore)
        )
        logger.info(msg)

        content = entry
        if ignore_list:
            content = "\n" + content

        with open(gitignore, "a") as fobj:
            fobj.write(content)

        if self.repo is not None:
            self.repo.files_to_git_add.append(os.path.relpath(gitignore))
Beispiel #3
0
def copyfile(src, dest, no_progress_bar=False, name=None):
    """Copy file with progress bar"""
    from dvc.exceptions import DvcException
    from dvc.progress import progress
    from dvc.system import System

    copied = 0
    name = name if name else os.path.basename(dest)
    total = os.stat(src).st_size

    if os.path.isdir(dest):
        dest = os.path.join(dest, os.path.basename(src))

    try:
        System.reflink(src, dest)
    except DvcException:
        with open(src, "rb") as fsrc, open(dest, "wb+") as fdest:
            while True:
                buf = fsrc.read(LOCAL_CHUNK_SIZE)
                if not buf:
                    break
                fdest.write(buf)
                copied += len(buf)
                if not no_progress_bar:
                    progress.update_target(name, copied, total)

    if not no_progress_bar:
        progress.finish_target(name)
Beispiel #4
0
def copyfile(src, dest, no_progress_bar=False, name=None):
    """Copy file with progress bar"""
    from dvc.progress import progress

    copied = 0
    name = name if name else os.path.basename(dest)
    total = os.stat(src).st_size

    fsrc = open(src, "rb")

    if os.path.isdir(dest):
        fdest = open(os.path.join(dest, os.path.basename(src)), "wb+")
    else:
        fdest = open(dest, "wb+")

    while True:
        buf = fsrc.read(LOCAL_CHUNK_SIZE)
        if not buf:
            break
        fdest.write(buf)
        copied += len(buf)
        if not no_progress_bar:
            progress.update_target(name, copied, total)

    if not no_progress_bar:
        progress.finish_target(name)

    fsrc.close()
    fdest.close()
Beispiel #5
0
    def init(dvc_dir):
        """Initializes dvc config.

        Args:
            dvc_dir (str): path to .dvc directory.

        Returns:
            dvc.config.Config: config object.
        """
        config_file = os.path.join(dvc_dir, Config.CONFIG)
        open(config_file, "w+").close()
        return Config(dvc_dir)
Beispiel #6
0
    def _reflink_linux(src, dst):
        import os
        import fcntl

        FICLONE = 0x40049409

        try:
            ret = 255
            with open(src, "r") as s, open(dst, "w+") as d:
                ret = fcntl.ioctl(d.fileno(), FICLONE, s.fileno())
        finally:
            if ret != 0:
                os.unlink(dst)

        return ret
Beispiel #7
0
    def _install_hook(self, name, cmd):
        command = "dvc {}".format(cmd)

        hook = os.path.join(self.root_dir, self.GIT_DIR, "hooks", name)

        if os.path.isfile(hook):
            with open(hook, "r+") as fobj:
                if command not in fobj.read():
                    fobj.write("exec {command}\n".format(command=command))
        else:
            with open(hook, "w+") as fobj:
                fobj.write("#!/bin/sh\n"
                           "exec {command}\n".format(command=command))

        os.chmod(hook, 0o777)
Beispiel #8
0
    def ignore_remove(self, path):
        entry, gitignore = self._get_gitignore(path)

        if not os.path.exists(gitignore):
            return

        with open(gitignore, "r") as fobj:
            lines = fobj.readlines()

        filtered = list(filter(lambda x: x.strip() != entry.strip(), lines))

        with open(gitignore, "w") as fobj:
            fobj.writelines(filtered)

        self.track_file(relpath(gitignore))
Beispiel #9
0
    def ignore(self, path, in_curr_dir=False):
        base_dir = (
            os.path.realpath(os.curdir)
            if in_curr_dir
            else os.path.dirname(path)
        )
        entry, gitignore = self._get_gitignore(path, base_dir)

        ignore_list = []
        if os.path.exists(gitignore):
            with open(gitignore, "r") as f:
                ignore_list = f.readlines()
            if any(filter(lambda x: x.strip() == entry.strip(), ignore_list)):
                return

        msg = "Adding '{}' to '{}'.".format(
            os.path.relpath(path), os.path.relpath(gitignore)
        )
        logger.info(msg)

        self._add_entry_to_gitignore(entry, gitignore, ignore_list)

        self.track_file(os.path.relpath(gitignore))

        self.ignored_paths.append(path)
Beispiel #10
0
    def load(repo, fname):
        Stage._check_file_exists(fname)
        Stage._check_dvc_filename(fname)

        if not Stage.is_stage_file(fname):
            raise StageFileIsNotDvcFileError(fname)

        with open(fname, "r") as fd:
            d = yaml.safe_load(fd) or {}

        Stage.validate(d, fname=os.path.relpath(fname))
        path = os.path.abspath(fname)

        stage = Stage(
            repo=repo,
            path=path,
            wdir=os.path.abspath(
                os.path.join(
                    os.path.dirname(path), d.get(Stage.PARAM_WDIR, ".")
                )
            ),
            cmd=d.get(Stage.PARAM_CMD),
            md5=d.get(Stage.PARAM_MD5),
            locked=d.get(Stage.PARAM_LOCKED, False),
        )

        stage.deps = dependency.loadd_from(stage, d.get(Stage.PARAM_DEPS, []))
        stage.outs = output.loadd_from(stage, d.get(Stage.PARAM_OUTS, []))

        return stage
Beispiel #11
0
    def ignore_remove(self, path):
        entry, gitignore = self._get_gitignore(path)

        if not os.path.exists(gitignore):
            return

        with open(gitignore, "r") as fobj:
            lines = fobj.readlines()

        filtered = list(filter(lambda x: x.strip() != entry.strip(), lines))

        with open(gitignore, "w") as fobj:
            fobj.writelines(filtered)

        if self.repo is not None:
            self.repo.files_to_git_add.append(os.path.relpath(gitignore))
Beispiel #12
0
 def _ignored(entry, gitignore_path):
     if os.path.exists(gitignore_path):
         with open(gitignore_path, "r") as fobj:
             ignore_list = fobj.readlines()
         return any(
             filter(lambda x: x.strip() == entry.strip(), ignore_list))
     return False
Beispiel #13
0
    def _install_hook(self, name, cmd):
        command = ('[ "$3" = "0" ]'
                   ' || [ -z "$(git ls-files .dvc)" ]'
                   " || exec dvc {}".format(cmd))

        hook = self._hook_path(name)

        if os.path.isfile(hook):
            with open(hook, "r+") as fobj:
                if command not in fobj.read():
                    fobj.write("{command}\n".format(command=command))
        else:
            with open(hook, "w+") as fobj:
                fobj.write("#!/bin/sh\n" "{command}\n".format(command=command))

        os.chmod(hook, 0o777)
Beispiel #14
0
 def _install_hook(self, name, cmd):
     hook = os.path.join(self.root_dir, self.GIT_DIR, "hooks", name)
     if os.path.isfile(hook):
         msg = "git hook '{}' already exists."
         raise SCMError(msg.format(os.path.relpath(hook)))
     with open(hook, "w+") as fobj:
         fobj.write("#!/bin/sh\nexec dvc {}\n".format(cmd))
     os.chmod(hook, 0o777)
Beispiel #15
0
def load_stage_file(path):
    from dvc.exceptions import StageFileCorruptedError

    with open(path, "r") as fobj:
        try:
            return yaml.safe_load(fobj) or {}
        except ScannerError:
            raise StageFileCorruptedError(path)
Beispiel #16
0
Datei: ignore.py Projekt: yk/dvc
    def __init__(self, ignore_file_path):
        assert os.path.isabs(ignore_file_path)

        self.ignore_file_path = ignore_file_path
        self.dirname = os.path.normpath(os.path.dirname(ignore_file_path))

        with open(ignore_file_path, encoding="utf-8") as fobj:
            self.ignore_spec = PathSpec.from_lines(GitWildMatchPattern, fobj)
Beispiel #17
0
    def create(self, name, contents):
        dname = os.path.dirname(name)
        if len(dname) > 0 and not os.path.isdir(dname):
            os.makedirs(dname)

        with open(name, "a", encoding="utf-8") as f:
            f.write(contents if isinstance(contents, str) else contents.
                    decode("utf-8"))
Beispiel #18
0
def _read_metrics_filesystem(path, typ, xpath, rel_path, branch):
    if not os.path.exists(path):
        return None
    with open(path, "r") as fd:
        return _read_metric(fd,
                            typ=typ,
                            xpath=xpath,
                            rel_path=rel_path,
                            branch=branch)
Beispiel #19
0
    def _link(self, from_info, to_info, link_types):
        from_path = from_info.fspath
        to_path = to_info.fspath

        assert os.path.isfile(from_path)

        dname = os.path.dirname(to_path)
        if not os.path.exists(dname):
            os.makedirs(dname)

        # NOTE: just create an empty file for an empty cache
        if os.path.getsize(from_path) == 0:
            open(to_path, "w+").close()

            msg = "Created empty file: {} -> {}".format(from_path, to_path)
            logger.debug(msg)
            return

        self._try_links(from_info, to_info, link_types)
Beispiel #20
0
 def _add_entry_to_gitignore(entry, gitignore):
     with open(gitignore, "a+", encoding="utf-8") as fobj:
         fobj.seek(0, os.SEEK_END)
         if fobj.tell() == 0:
             # Empty file
             prefix = ""
         else:
             fobj.seek(fobj.tell() - 1, os.SEEK_SET)
             last = fobj.read(1)
             prefix = "" if last == "\n" else "\n"
         fobj.write("{}{}\n".format(prefix, entry))
Beispiel #21
0
    def _get_latest_version(self):
        import json

        try:
            r = requests.get(self.URL, timeout=self.TIMEOUT_GET)
            info = r.json()
        except requests.exceptions.RequestException as exc:
            msg = "Failed to retrieve latest version: {}"
            logger.debug(msg.format(exc))
            return

        with open(self.updater_file, "w+") as fobj:
            json.dump(info, fobj)
Beispiel #22
0
    def dump(self):
        fname = self.path

        self._check_dvc_filename(fname)

        logger.info("Saving information to '{file}'.".format(
            file=os.path.relpath(fname)))
        d = self.dumpd()

        with open(fname, "w") as fd:
            yaml.safe_dump(d, fd, default_flow_style=False)

        self.repo.scm.track_file(os.path.relpath(fname))
Beispiel #23
0
 def _download(self, from_info, to_file, name=None, no_progress_bar=False):
     request = self._request("GET", from_info.url, stream=True)
     with Tqdm(
         total=None if no_progress_bar else self._content_length(from_info),
         leave=False,
         bytes=True,
         desc_truncate=from_info.url if name is None else name,
         disable=no_progress_bar,
     ) as pbar:
         with open(to_file, "wb") as fd:
             for chunk in request.iter_content(chunk_size=self.CHUNK_SIZE):
                 fd.write(chunk)
                 fd.flush()
                 pbar.update(len(chunk))
Beispiel #24
0
    def dump(self, fname=None):
        fname = fname or self.path

        self._check_dvc_filename(fname)

        logger.info(
            "Saving information to '{file}'.".format(
                file=os.path.relpath(fname)
            )
        )

        with open(fname, "w") as fd:
            yaml.safe_dump(self.dumpd(), fd, default_flow_style=False)

        self.repo.files_to_git_add.append(os.path.relpath(fname))
Beispiel #25
0
    def _reflink_linux(src, dst):
        import os
        import fcntl

        FICLONE = 0x40049409

        s = open(src, "r")
        d = open(dst, "w+")

        try:
            ret = fcntl.ioctl(d.fileno(), FICLONE, s.fileno())
        except IOError:
            s.close()
            d.close()
            os.unlink(dst)
            raise

        s.close()
        d.close()

        if ret != 0:
            os.unlink(dst)

        return ret
Beispiel #26
0
    def _write_request_content(
        self, mode, partial_file, request, transferred_bytes, callback=None
    ):
        with open(partial_file, mode) as fd:

            for index, chunk in enumerate(
                request.iter_content(chunk_size=self.CHUNK_SIZE)
            ):
                chunk_number = index + 1
                if chunk_number * self.CHUNK_SIZE > transferred_bytes:
                    fd.write(chunk)
                    fd.flush()
                    transferred_bytes += len(chunk)

                if callback:
                    callback(transferred_bytes)
Beispiel #27
0
 def _download(self, from_info, to_file, name=None, no_progress_bar=False):
     response = self._request("GET", from_info.url, stream=True)
     if response.status_code != 200:
         raise HTTPError(response.status_code, response.reason)
     with Tqdm(
             total=None
             if no_progress_bar else self._content_length(response),
             leave=False,
             bytes=True,
             desc=from_info.url if name is None else name,
             disable=no_progress_bar,
     ) as pbar:
         with open(to_file, "wb") as fd:
             for chunk in response.iter_content(chunk_size=self.CHUNK_SIZE):
                 fd.write(chunk)
                 pbar.update(len(chunk))
Beispiel #28
0
    def _get_cache_type(self, path_info):
        if self.cache_type_confirmed:
            return self.cache_types[0]

        workspace_file = path_info.with_name("." + uuid())
        test_cache_file = self.path_info / ".cache_type_test_file"
        if not self.exists(test_cache_file):
            with open(fspath_py35(test_cache_file), "wb") as fobj:
                fobj.write(bytes(1))
        try:
            self.link(test_cache_file, workspace_file)
        finally:
            self.remove(workspace_file)
            self.remove(test_cache_file)

        self.cache_type_confirmed = True
        return self.cache_types[0]
Beispiel #29
0
    def _download(self, from_info, to_file, name=None, no_progress_bar=False):
        callback = None
        if not no_progress_bar:
            total = self._content_length(from_info.url)
            if total:
                callback = ProgressBarCallback(name, total)

        request = self._request("GET", from_info.url, stream=True)

        with open(to_file, "wb") as fd:
            transferred_bytes = 0

            for chunk in request.iter_content(chunk_size=self.CHUNK_SIZE):
                fd.write(chunk)
                fd.flush()
                transferred_bytes += len(chunk)

                if callback:
                    callback(transferred_bytes)
Beispiel #30
0
def file_md5(fname):
    """ get the (md5 hexdigest, md5 digest) of a file """
    from dvc.progress import Tqdm
    from dvc.istextfile import istextfile

    fname = fspath_py35(fname)

    if os.path.exists(fname):
        hash_md5 = hashlib.md5()
        binary = not istextfile(fname)
        size = os.path.getsize(fname)
        no_progress_bar = True
        if size >= LARGE_FILE_SIZE:
            no_progress_bar = False
            msg = (
                "Computing md5 for a large file '{}'. This is only done once.")
            logger.info(msg.format(relpath(fname)))
        name = relpath(fname)

        with Tqdm(
                desc=name,
                disable=no_progress_bar,
                total=size,
                bytes=True,
                leave=False,
        ) as pbar:
            with open(fname, "rb") as fobj:
                while True:
                    data = fobj.read(LOCAL_CHUNK_SIZE)
                    if not data:
                        break

                    if binary:
                        chunk = data
                    else:
                        chunk = dos2unix(data)

                    hash_md5.update(chunk)
                    pbar.update(len(data))

        return (hash_md5.hexdigest(), hash_md5.digest())

    return (None, None)