Example #1
0
def erepo_dir(tmp_path_factory, monkeypatch):
    from dvc.repo import Repo
    from dvc.remote.config import RemoteConfig

    path = TmpDir(fspath_py35(tmp_path_factory.mktemp("erepo")))

    # Chdir for git and dvc to work locally
    monkeypatch.chdir(fspath_py35(path))

    _git_init()
    path.dvc = Repo.init()
    path.scm = path.dvc.scm
    path.dvc_gen(REPO_TEMPLATE, commit="init repo")

    rconfig = RemoteConfig(path.dvc.config)
    rconfig.add("upstream", path.dvc.cache.local.cache_dir, default=True)
    path.scm_add([path.dvc.config.config_file], commit="add remote")

    path.dvc_gen("version", "master")
    path.scm_add([".gitignore", "version.dvc"], commit="master")

    path.scm.checkout("branch", create_new=True)
    (path / "version").unlink()  # For mac ???
    path.dvc_gen("version", "branch")
    path.scm_add([".gitignore", "version.dvc"], commit="branch")

    path.scm.checkout("master")
    path.dvc.close()
    monkeypatch.undo()  # Undo chdir

    return path
Example #2
0
    def upload(self, from_infos, to_infos, names=None, no_progress_bar=False):
        names = self._verify_path_args(to_infos, from_infos, names)

        for from_info, to_info, name in zip(from_infos, to_infos, names):
            if to_info.scheme != "local":
                raise NotImplementedError

            if from_info.scheme != "local":
                raise NotImplementedError

            logger.debug("Uploading '{}' to '{}'".format(from_info, to_info))

            if not name:
                name = from_info.name

            makedirs(fspath_py35(to_info.parent), exist_ok=True)
            tmp_file = tmp_fname(to_info)

            try:
                copyfile(
                    fspath_py35(from_info),
                    tmp_file,
                    name=name,
                    no_progress_bar=no_progress_bar,
                )
                os.rename(tmp_file, fspath_py35(to_info))
            except Exception:
                logger.exception(
                    "failed to upload '{}' to '{}'".format(from_info, to_info)
                )
Example #3
0
def copyfile(src, dest, no_progress_bar=False, name=None):
    """Copy file with progress bar"""
    from dvc.exceptions import DvcException
    from dvc.progress import Tqdm
    from dvc.system import System

    src = fspath_py35(src)
    dest = fspath_py35(dest)

    name = name if name else os.path.basename(dest)
    total = os.stat(src).st_size

    if os.path.isdir(dest):
        dest = os.path.join(dest, os.path.basename(src))

    try:
        System.reflink(src, dest)
    except DvcException:
        with Tqdm(desc=name, disable=no_progress_bar, total=total,
                  bytes=True) as pbar:
            with open(src, "rb") as fsrc, open(dest, "wb+") as fdest:
                while True:
                    buf = fsrc.read(LOCAL_CHUNK_SIZE)
                    if not buf:
                        break
                    fdest.write(buf)
                    pbar.update(len(buf))
Example #4
0
File: oss.py Project: samlex20/dvc
    def download(
        self,
        from_infos,
        to_infos,
        names=None,
        no_progress_bar=False,
        resume=False,
    ):
        names = self._verify_path_args(from_infos, to_infos, names)
        for to_info, from_info, name in zip(to_infos, from_infos, names):
            if from_info.scheme != self.scheme:
                raise NotImplementedError
            if to_info.scheme != "local":
                raise NotImplementedError

            logger.debug("Downloading '{}' to '{}'".format(from_info, to_info))

            tmp_file = tmp_fname(to_info)
            if not name:
                name = to_info.name

            cb = None if no_progress_bar else Callback(name)

            makedirs(fspath_py35(to_info.parent), exist_ok=True)
            try:
                self.oss_service.get_object_to_file(from_info.path,
                                                    tmp_file,
                                                    progress_callback=cb)
            except Exception:
                logger.warning("failed to download '{}'".format(from_info))
            else:
                move(tmp_file, fspath_py35(to_info))
            finally:
                if not no_progress_bar:
                    progress.finish_target(name)
Example #5
0
def relpath(path, start=os.curdir):
    path = fspath_py35(path)
    start = fspath_py35(os.path.abspath(start))

    # Windows path on different drive than curdir doesn't have relpath
    if os.name == "nt" and not os.path.commonprefix(
        [start, os.path.abspath(path)]):
        return path
    return os.path.relpath(path, start)
Example #6
0
File: base.py Project: vibhor98/dvc
    def download(
        self,
        from_infos,
        to_infos,
        no_progress_bar=False,
        names=None,
        resume=False,
    ):
        if not hasattr(self, "_download"):
            raise RemoteActionNotImplemented("download", self.scheme)

        names = self._verify_path_args(from_infos, to_infos, names)

        with self.transfer_context() as ctx:
            for to_info, from_info, name in zip(to_infos, from_infos, names):
                if from_info.scheme != self.scheme:
                    raise NotImplementedError

                if to_info.scheme == self.scheme != "local":
                    self.copy(from_info, to_info, ctx=ctx)
                    continue

                if to_info.scheme != "local":
                    raise NotImplementedError

                msg = "Downloading '{}' to '{}'".format(from_info, to_info)
                logger.debug(msg)

                tmp_file = tmp_fname(to_info)
                if not name:
                    name = to_info.name

                if not no_progress_bar:
                    # real progress is not always available,
                    # lets at least show start and finish
                    progress.update_target(name, 0, None)

                makedirs(fspath_py35(to_info.parent), exist_ok=True)

                try:
                    self._download(
                        from_info,
                        tmp_file,
                        name=name,
                        ctx=ctx,
                        resume=resume,
                        no_progress_bar=no_progress_bar,
                    )
                except Exception:
                    msg = "failed to download '{}' to '{}'"
                    logger.exception(msg.format(from_info, to_info))
                    continue

                move(tmp_file, fspath_py35(to_info))

                if not no_progress_bar:
                    progress.finish_target(name)
Example #7
0
    def _upload(
        self, from_file, to_info, name=None, no_progress_bar=False, **_kwargs
    ):
        makedirs(fspath_py35(to_info.parent), exist_ok=True)

        tmp_file = tmp_fname(to_info)
        copyfile(
            from_file, tmp_file, name=name, no_progress_bar=no_progress_bar
        )
        os.rename(tmp_file, fspath_py35(to_info))
Example #8
0
    def list_cache_paths(self):
        assert self.path_info is not None

        clist = []
        for entry in os.listdir(fspath_py35(self.path_info)):
            subdir = self.path_info / entry
            if not os.path.isdir(fspath_py35(subdir)):
                continue
            clist.extend(subdir / cache
                         for cache in os.listdir(fspath_py35(subdir)))

        return clist
Example #9
0
File: s3.py Project: samlex20/dvc
    def download(
        self,
        from_infos,
        to_infos,
        no_progress_bar=False,
        names=None,
        resume=False,
    ):
        names = self._verify_path_args(from_infos, to_infos, names)

        s3 = self.s3

        for to_info, from_info, name in zip(to_infos, from_infos, names):
            if from_info.scheme != "s3":
                raise NotImplementedError

            if to_info.scheme == "s3":
                self.copy(from_info, to_info, s3=s3)
                continue

            if to_info.scheme != "local":
                raise NotImplementedError

            msg = "Downloading '{}' to '{}'".format(from_info, to_info)
            logger.debug(msg)

            tmp_file = tmp_fname(to_info)
            if not name:
                name = to_info.name

            makedirs(fspath_py35(to_info.parent), exist_ok=True)
            try:
                if no_progress_bar:
                    cb = None
                else:
                    total = s3.head_object(
                        Bucket=from_info.bucket, Key=from_info.path
                    )["ContentLength"]
                    cb = Callback(name, total)

                s3.download_file(
                    from_info.bucket, from_info.path, tmp_file, Callback=cb
                )
            except Exception:
                msg = "failed to download '{}'".format(from_info)
                logger.exception(msg)
                continue

            move(tmp_file, fspath_py35(to_info))

            if not no_progress_bar:
                progress.finish_target(name)
Example #10
0
    def download(
        self,
        from_infos,
        to_infos,
        no_progress_bar=False,
        names=None,
        resume=False,
    ):
        names = self._verify_path_args(from_infos, to_infos, names)

        gs = self.gs

        for to_info, from_info, name in zip(to_infos, from_infos, names):
            if from_info.scheme != "gs":
                raise NotImplementedError

            if to_info.scheme == "gs":
                self.copy(from_info, to_info, gs=gs)
                continue

            if to_info.scheme != "local":
                raise NotImplementedError

            msg = "Downloading '{}' to '{}'".format(from_info, to_info)
            logger.debug(msg)

            tmp_file = tmp_fname(to_info)
            if not name:
                name = to_info.name

            if not no_progress_bar:
                # percent_cb is not available for download_to_filename, so
                # lets at least update progress at pathpoints(start, finish)
                progress.update_target(name, 0, None)

            makedirs(fspath_py35(to_info.parent), exist_ok=True)
            try:
                bucket = gs.bucket(from_info.bucket)
                blob = bucket.get_blob(from_info.path)
                blob.download_to_filename(tmp_file)
            except Exception:
                msg = "failed to download '{}' to '{}'"
                logger.exception(msg.format(from_info, to_info))
                continue

            move(tmp_file, fspath_py35(to_info))

            if not no_progress_bar:
                progress.finish_target(name)
Example #11
0
    def get(self, path_info):
        """Gets the checksum for the specified path info. Checksum will be
        retrieved from the state database if available.

        Args:
            path_info (dict): path info to get the checksum for.

        Returns:
            str or None: checksum for the specified path info or None if it
            doesn't exist in the state database.
        """
        assert path_info.scheme == "local"
        path = fspath_py35(path_info)

        if not os.path.exists(path):
            return None

        actual_mtime, actual_size = get_mtime_and_size(path,
                                                       self.repo.dvcignore)
        actual_inode = get_inode(path)

        existing_record = self.get_state_record_for_inode(actual_inode)
        if not existing_record:
            return None

        mtime, size, checksum, _ = existing_record
        if self._file_metadata_changed(actual_mtime, mtime, actual_size, size):
            return None

        self._update_state_record_timestamp_for_inode(actual_inode)
        return checksum
Example #12
0
File: state.py Project: vasinkd/dvc
    def save_link(self, path_info):
        """Adds the specified path to the list of links created by dvc. This
        list is later used on `dvc checkout` to cleanup old links.

        Args:
            path_info (dict): path info to add to the list of links.
        """
        assert path_info.scheme == "local"
        path = fspath_py35(path_info)

        if not os.path.exists(path):
            return

        mtime, _ = get_mtime_and_size(path)
        inode = get_inode(path)
        relative_path = relpath(path, self.root_dir)

        cmd = ("REPLACE INTO {}(path, inode, mtime) "
               'VALUES ("{}", {}, "{}")'.format(
                   self.LINK_STATE_TABLE,
                   relative_path,
                   self._to_sqlite(inode),
                   mtime,
               ))
        self._execute(cmd)
Example #13
0
    def open(self, path, remote=None, mode="r", encoding=None):
        """Opens a specified resource as a file descriptor"""
        out, = self.find_outs_by_path(path)
        if out.isdir():
            raise ValueError("Can't open a dir")

        cache_file = self.cache.local.checksum_to_path_info(out.checksum)
        cache_file = fspath_py35(cache_file)

        if os.path.exists(cache_file):
            return _open(cache_file, mode=mode, encoding=encoding)

        try:
            remote_obj = self.cloud.get_remote(remote)
            remote_info = remote_obj.checksum_to_path_info(out.checksum)
            return remote_obj.open(remote_info, mode=mode, encoding=encoding)
        except RemoteActionNotImplemented:
            with self.state:
                cache_info = out.get_used_cache(remote=remote)
                self.cloud.pull(cache_info, remote=remote)

            # Since pull may just skip with a warning, we need to check it here
            if not os.path.exists(cache_file):
                raise OutputFileMissingError(relpath(path, self.root_dir))

            return _open(cache_file, mode=mode, encoding=encoding)
Example #14
0
    def save(self, path_info, checksum):
        """Save checksum for the specified path info.

        Args:
            path_info (dict): path_info to save checksum for.
            checksum (str): checksum to save.
        """
        assert path_info.scheme == "local"
        assert checksum is not None

        path = fspath_py35(path_info)
        assert os.path.exists(path)

        actual_mtime, actual_size = get_mtime_and_size(path,
                                                       self.repo.dvcignore)
        actual_inode = get_inode(path)

        existing_record = self.get_state_record_for_inode(actual_inode)
        if not existing_record:
            self._insert_new_state_record(actual_inode, actual_mtime,
                                          actual_size, checksum)
            return

        self._update_state_for_path_changed(actual_inode, actual_mtime,
                                            actual_size, checksum)
Example #15
0
File: s3.py Project: samlex20/dvc
    def upload(self, from_infos, to_infos, names=None, no_progress_bar=False):
        names = self._verify_path_args(to_infos, from_infos, names)

        s3 = self.s3

        for from_info, to_info, name in zip(from_infos, to_infos, names):
            if to_info.scheme != "s3":
                raise NotImplementedError

            if from_info.scheme != "local":
                raise NotImplementedError

            logger.debug("Uploading '{}' to '{}'".format(from_info, to_info))

            if not name:
                name = from_info.name

            total = os.path.getsize(fspath_py35(from_info))
            cb = None if no_progress_bar else Callback(name, total)

            try:
                s3.upload_file(
                    from_info.fspath,
                    to_info.bucket,
                    to_info.path,
                    Callback=cb,
                    ExtraArgs=self.extra_args,
                )
            except Exception:
                msg = "failed to upload '{}'".format(from_info)
                logger.exception(msg)
                continue

            progress.finish_target(name)
Example #16
0
 def _download(
     self, from_info, to_file, name=None, no_progress_bar=False, **_kwargs
 ):
     copyfile(
         fspath_py35(from_info),
         to_file,
         no_progress_bar=no_progress_bar,
         name=name,
     )
Example #17
0
    def download(self, from_info, to_info, name=None, no_progress_bar=False):
        if not hasattr(self, "_download"):
            raise RemoteActionNotImplemented("download", self.scheme)

        if from_info.scheme != self.scheme:
            raise NotImplementedError

        if to_info.scheme == self.scheme != "local":
            self.copy(from_info, to_info)
            return 0

        if to_info.scheme != "local":
            raise NotImplementedError

        logger.debug("Downloading '{}' to '{}'".format(from_info, to_info))

        name = name or to_info.name

        if not no_progress_bar:
            # real progress is not always available,
            # lets at least show start and finish
            progress.update_target(name, 0, None)

        makedirs(fspath_py35(to_info.parent), exist_ok=True)
        tmp_file = tmp_fname(to_info)

        try:
            self._download(from_info,
                           tmp_file,
                           name=name,
                           no_progress_bar=no_progress_bar)
        except Exception:
            msg = "failed to download '{}' to '{}'"
            logger.exception(msg.format(from_info, to_info))
            return 1  # 1 fail

        move(tmp_file, fspath_py35(to_info))

        if not no_progress_bar:
            progress.finish_target(name)

        return 0
Example #18
0
def makedirs(path, exist_ok=False, mode=None):
    path = fspath_py35(path)

    if mode is None:
        _makedirs(path, exist_ok=exist_ok)
        return

    umask = os.umask(0)
    try:
        _makedirs(path, exist_ok=exist_ok, mode=mode)
    finally:
        os.umask(umask)
Example #19
0
File: http.py Project: vasinkd/dvc
    def download(
        self,
        from_infos,
        to_infos,
        names=None,
        no_progress_bar=False,
        resume=False,
    ):
        names = self._verify_path_args(to_infos, from_infos, names)
        fails = 0

        for to_info, from_info, name in zip(to_infos, from_infos, names):
            if from_info.scheme != self.scheme:
                raise NotImplementedError

            if to_info.scheme != "local":
                raise NotImplementedError

            msg = "Downloading '{}' to '{}'".format(from_info, to_info)
            logger.debug(msg)

            if not name:
                name = to_info.name

            makedirs(fspath_py35(to_info.parent), exist_ok=True)

            total = self._content_length(from_info.url)

            if no_progress_bar or not total:
                cb = None
            else:
                cb = ProgressBarCallback(name, total)

            try:
                self._download_to(from_info.url,
                                  to_info.fspath,
                                  callback=cb,
                                  resume=resume)

            except Exception:
                fails += 1
                msg = "failed to download '{}'".format(from_info)
                logger.exception(msg)
                continue

            if not no_progress_bar:
                progress.finish_target(name)

        return fails
Example #20
0
def makedirs(path, exist_ok=False, mode=None):
    path = fspath_py35(path)

    if mode is None:
        _makedirs(path, exist_ok=exist_ok)
        return

    # utilize umask to set proper permissions since Python 3.7 the `mode`
    # `makedirs` argument no longer affects the file permission bits of
    # newly-created intermediate-level directories.
    umask = os.umask(0o777 - mode)
    try:
        _makedirs(path, exist_ok=exist_ok)
    finally:
        os.umask(umask)
Example #21
0
def dvc_walk(top, dvcignore, topdown=True, onerror=None, followlinks=False):
    """
    Proxy for `os.walk` directory tree generator.
    Utilizes DvcIgnoreFilter functionality.
    """
    top = fspath_py35(top)

    for root, dirs, files in os.walk(
        top, topdown=topdown, onerror=onerror, followlinks=followlinks
    ):

        if dvcignore:
            dirs[:], files[:] = dvcignore(root, dirs, files)

        yield root, dirs, files
Example #22
0
def move(src, dst, mode=None):
    """Atomically move src to dst and chmod it with mode.

    Moving is performed in two stages to make the whole operation atomic in
    case src and dst are on different filesystems and actual physical copying
    of data is happening.
    """

    src = fspath_py35(src)
    dst = fspath_py35(dst)

    dst = os.path.abspath(dst)
    tmp = "{}.{}".format(dst, str(uuid()))

    if os.path.islink(src):
        shutil.copy(os.readlink(src), tmp)
        os.unlink(src)
    else:
        shutil.move(src, tmp)

    if mode is not None:
        os.chmod(tmp, mode)

    shutil.move(tmp, dst)
Example #23
0
    def protect(path_info):
        path = fspath_py35(path_info)
        mode = stat.S_IREAD | stat.S_IRGRP | stat.S_IROTH

        try:
            os.chmod(path, mode)
        except OSError as exc:
            # In share cache scenario, we might not own the cache file, so we
            # need to check if cache file is already protected.
            if exc.errno not in [errno.EPERM, errno.EACCES]:
                raise

            actual = os.stat(path).st_mode
            if actual & mode != mode:
                raise
Example #24
0
    def verify_metric(self):
        if not self.metric:
            return

        path = fspath_py35(self.path_info)
        if not os.path.exists(path):
            return

        if os.path.isdir(path):
            msg = "directory '{}' cannot be used as metrics."
            raise DvcException(msg.format(self.path_info))

        if not istextfile(path):
            msg = "binary file '{}' cannot be used as metrics."
            raise DvcException(msg.format(self.path_info))
Example #25
0
    def _get_cache_type(self, path_info):
        if self.cache_type_confirmed:
            return self.cache_types[0]

        workspace_file = path_info.with_name("." + uuid())
        test_cache_file = self.path_info / ".cache_type_test_file"
        if not self.exists(test_cache_file):
            with open(fspath_py35(test_cache_file), "wb") as fobj:
                fobj.write(bytes(1))
        try:
            self.link(test_cache_file, workspace_file)
        finally:
            self.remove(workspace_file)
            self.remove(test_cache_file)

        self.cache_type_confirmed = True
        return self.cache_types[0]
Example #26
0
    def _parse_path(self, remote, path):
        parsed = urlparse(path)
        if parsed.scheme == "remote":
            p = remote.path_info / parsed.path.lstrip("/")
        else:
            # NOTE: we can path either from command line or .dvc file,
            # so we should expect both posix and windows style paths.
            # PathInfo accepts both, i.e. / works everywhere, \ only on win.
            #
            # FIXME: if we have Windows path containig / or posix one with \
            # then we have #2059 bug and can't really handle that.
            p = self.REMOTE.path_cls(path)
            if not p.is_absolute():
                p = self.stage.wdir / p

        abs_p = os.path.abspath(os.path.normpath(fspath_py35(p)))
        return self.REMOTE.path_cls(abs_p)
Example #27
0
def file_md5(fname):
    """ get the (md5 hexdigest, md5 digest) of a file """
    from dvc.progress import Tqdm
    from dvc.istextfile import istextfile

    fname = fspath_py35(fname)

    if os.path.exists(fname):
        hash_md5 = hashlib.md5()
        binary = not istextfile(fname)
        size = os.path.getsize(fname)
        no_progress_bar = True
        if size >= LARGE_FILE_SIZE:
            no_progress_bar = False
            msg = (
                "Computing md5 for a large file '{}'. This is only done once.")
            logger.info(msg.format(relpath(fname)))
        name = relpath(fname)

        with Tqdm(
                desc=name,
                disable=no_progress_bar,
                total=size,
                bytes=True,
                leave=False,
        ) as pbar:
            with open(fname, "rb") as fobj:
                while True:
                    data = fobj.read(LOCAL_CHUNK_SIZE)
                    if not data:
                        break

                    if binary:
                        chunk = data
                    else:
                        chunk = dos2unix(data)

                    hash_md5.update(chunk)
                    pbar.update(len(data))

        return (hash_md5.hexdigest(), hash_md5.digest())

    return (None, None)
Example #28
0
    def _open_cached(self, out, remote=None, mode="r", encoding=None):
        if out.isdir():
            raise ValueError("Can't open a dir")

        cache_file = self.cache.local.checksum_to_path_info(out.checksum)
        cache_file = fspath_py35(cache_file)

        if os.path.exists(cache_file):
            return _open(cache_file, mode=mode, encoding=encoding)

        try:
            remote_obj = self.cloud.get_remote(remote)
            remote_info = remote_obj.checksum_to_path_info(out.checksum)
            return remote_obj.open(remote_info, mode=mode, encoding=encoding)
        except RemoteActionNotImplemented:
            with self.state:
                cache_info = out.get_used_cache(remote=remote)
                self.cloud.pull(cache_info, remote=remote)

            return _open(cache_file, mode=mode, encoding=encoding)
Example #29
0
 def open(self, path_info, mode="r", encoding=None):
     assert mode in {"r", "rt", "rb"}
     return open(fspath_py35(path_info), mode=mode, encoding=encoding)
Example #30
0
 def get_file_checksum(self, path_info):
     return file_md5(fspath_py35(path_info))[0]