Example #1
0
    def download(self,
                 from_infos,
                 to_infos,
                 no_progress_bar=False,
                 names=None):
        names = self._verify_path_args(from_infos, to_infos, names)

        for to_info, from_info, name in zip(to_infos, from_infos, names):
            if from_info['scheme'] != self.scheme:
                raise NotImplementedError

            if to_info['scheme'] != 'local':
                raise NotImplementedError

            bucket = from_info['bucket']
            path = from_info['path']

            logger.debug("Downloading '{}/{}' to '{}'".format(
                bucket, path, to_info['path']))

            tmp_file = self.tmp_file(to_info['path'])
            if not name:
                name = os.path.basename(to_info['path'])

            cb = None if no_progress_bar else Callback(name)

            self._makedirs(to_info['path'])

            try:
                self.blob_service.get_blob_to_path(bucket,
                                                   path,
                                                   tmp_file,
                                                   progress_callback=cb)
            except Exception:
                msg = "failed to download '{}/{}'".format(bucket, path)
                logger.warning(msg)
            else:
                os.rename(tmp_file, to_info['path'])

                if not no_progress_bar:
                    progress.finish_target(name)
Example #2
0
    def reproduce(self, force=False, dry=False, interactive=False):
        if not self.changed() and not force:
            return None

        if (self.cmd or self.is_import) and not self.locked and not dry:
            # Removing outputs only if we actually have command to reproduce
            self.remove_outs(ignore_remove=False)

        msg = "Going to reproduce '{stage}'. Are you sure you want to continue?".format(
            stage=self.relpath)

        if interactive and not prompt.confirm(msg):
            raise DvcException("reproduction aborted by the user")

        logger.info("Reproducing '{stage}'".format(stage=self.relpath))

        self.run(dry=dry)

        logger.debug("'{stage}' was reproduced".format(stage=self.relpath))

        return self
Example #3
0
def daemon(args):
    """Launch a `dvc daemon` command in a detached process.

    Args:
        args (list): list of arguments to append to `dvc daemon` command.
    """
    cmd = [sys.executable]
    if not is_binary():
        cmd += ['-m', 'dvc']
    cmd += ['daemon', '-q'] + args

    logger.debug("Trying to spawn '{}'".format(cmd))

    if os.name == 'nt':
        _spawn_windows(cmd)
    elif os.name == 'posix':
        _spawn_posix(cmd)
    else:
        raise NotImplementedError

    logger.debug("Spawned '{}'".format(cmd))
Example #4
0
    def reproduce(self,
                  force=False,
                  dry=False,
                  interactive=False,
                  no_commit=False):
        if not self.changed() and not force:
            return None

        msg = ("Going to reproduce '{stage}'. "
               "Are you sure you want to continue?".format(stage=self.relpath))

        if interactive and not prompt.confirm(msg):
            raise DvcException("reproduction aborted by the user")

        logger.info("Reproducing '{stage}'".format(stage=self.relpath))

        self.run(dry=dry, no_commit=no_commit, force=force)

        logger.debug("'{stage}' was reproduced".format(stage=self.relpath))

        return self
Example #5
0
    def checkout(self, output, force=False, progress_callback=None):
        scheme = output.path_info["scheme"]
        if scheme not in ["", "local"] and scheme != self.scheme:
            raise NotImplementedError

        checksum = output.info.get(self.PARAM_CHECKSUM)
        if not checksum:
            msg = "No checksum info for '{}'."
            logger.debug(msg.format(str(output.path_info)))
            return

        if not self.changed(output.path_info, output.info):
            msg = "Data '{}' didn't change."
            logger.debug(msg.format(str(output.path_info)))
            return

        if self.changed_cache(checksum):
            msg = "Cache '{}' not found. File '{}' won't be created."
            logger.warning(msg.format(checksum, str(output.path_info)))
            self.safe_remove(output.path_info, force=force)
            return

        msg = "Checking out '{}' with cache '{}'."
        logger.debug(msg.format(str(output.path_info), checksum))

        self.do_checkout(output,
                         force=force,
                         progress_callback=progress_callback)
Example #6
0
    def upload(self, from_infos, to_infos, names=None):
        names = self._verify_path_args(to_infos, from_infos, names)

        ssh = self.ssh(host=to_infos[0]['host'],
                       user=to_infos[0]['user'],
                       port=to_infos[0]['port'])
        sftp = ssh.open_sftp()

        for from_info, to_info, name in zip(from_infos, to_infos, names):
            if to_info['scheme'] != 'ssh':
                raise NotImplementedError

            if from_info['scheme'] != 'local':
                raise NotImplementedError

            logger.debug("Uploading '{}' to '{}/{}'".format(from_info['path'],
                                                            to_info['host'],
                                                            to_info['path']))

            if not name:
                name = os.path.basename(from_info['path'])

            dname = posixpath.dirname(to_info['path'])
            self._exec(ssh, 'mkdir -p {}'.format(dname))

            try:
                sftp.put(from_info['path'],
                         to_info['path'],
                         callback=create_cb(name))
            except Exception:
                msg = "failed to upload '{}' to '{}/{}'"
                logger.error(msg.format(from_info['path'],
                                        to_info['host'],
                                        to_info['path']))
                continue

            progress.finish_target(name)

        sftp.close()
        ssh.close()
Example #7
0
    def _unprotect_file(self, path):
        import stat
        import uuid
        from dvc.system import System
        from dvc.utils import copyfile, move, remove

        if System.is_symlink(path) or System.is_hardlink(path):
            logger.debug("Unprotecting '{}'".format(path))

            tmp = os.path.join(os.path.dirname(path), "." + str(uuid.uuid4()))
            move(path, tmp)

            copyfile(tmp, path)

            remove(tmp)
        else:
            logger.debug(
                "Skipping copying for '{}', since it is not "
                "a symlink or a hardlink.".format(path)
            )

        os.chmod(path, os.stat(path).st_mode | stat.S_IWRITE)
Example #8
0
File: local.py Project: ei-grad/dvc
    def download(
        self,
        from_infos,
        to_infos,
        no_progress_bar=False,
        names=None,
        resume=False,
    ):
        names = self._verify_path_args(from_infos, to_infos, names)

        for to_info, from_info, name in zip(to_infos, from_infos, names):
            if from_info["scheme"] != "local":
                raise NotImplementedError

            if to_info["scheme"] != "local":
                raise NotImplementedError

            logger.debug("Downloading '{}' to '{}'".format(
                from_info["path"], to_info["path"]))

            if not name:
                name = os.path.basename(to_info["path"])

            makedirs(os.path.dirname(to_info["path"]), exist_ok=True)
            tmp_file = tmp_fname(to_info["path"])
            try:
                copyfile(
                    from_info["path"],
                    tmp_file,
                    no_progress_bar=no_progress_bar,
                    name=name,
                )

                os.rename(tmp_file, to_info["path"])
            except Exception:
                logger.error("failed to download '{}' to '{}'".format(
                    from_info["path"], to_info["path"]))

                continue
Example #9
0
    def test_exc_info_on_other_record_types(self, caplog, dt):
        with caplog.at_level(logging.DEBUG, logger="dvc"):
            try:
                raise Exception("description")
            except Exception:
                stack_trace = traceback.format_exc()
                logger.debug("", exc_info=True)

            expected = (
                "{green}{datetime}{nc} "
                "{blue}DEBUG{nc}: description\n"
                "{red}{line}{nc}\n"
                "{stack_trace}"
                "{red}{line}{nc}".format(
                    line="-" * 60,
                    stack_trace=stack_trace,
                    datetime=dt,
                    **colors,
                )
            )

            assert expected == formatter.format(caplog.records[0])
Example #10
0
    def _compute_md5(self):
        from dvc.output.local import OutputLOCAL

        d = self.dumpd()

        # NOTE: removing md5 manually in order to not affect md5s in deps/outs
        if self.PARAM_MD5 in d.keys():
            del d[self.PARAM_MD5]

        # Ignore the wdir default value. In this case stage file w/o
        # wdir has the same md5 as a file with the default value specified.
        # It's important for backward compatibility with pipelines that
        # didn't have WDIR in their stage files.
        if d.get(self.PARAM_WDIR) == ".":
            del d[self.PARAM_WDIR]

        # NOTE: excluding parameters that don't affect the state of the
        # pipeline. Not excluding `OutputLOCAL.PARAM_CACHE`, because if
        # it has changed, we might not have that output in our cache.
        m = dict_md5(d, exclude=[self.PARAM_LOCKED, OutputLOCAL.PARAM_METRIC])
        logger.debug("Computed stage '{}' md5: '{}'".format(self.relpath, m))
        return m
Example #11
0
    def _collect_dir_cache(self,
                           out,
                           branch=None,
                           remote=None,
                           force=False,
                           jobs=None):
        info = out.dumpd()
        ret = [info]
        r = out.remote
        md5 = info[r.PARAM_CHECKSUM]

        if self.cache.local.changed_cache_file(md5):
            try:
                self.cloud.pull(ret,
                                jobs=jobs,
                                remote=remote,
                                show_checksums=False)
            except DvcException as exc:
                msg = "Failed to pull cache for '{}': {}"
                logger.debug(msg.format(out, exc))

        if self.cache.local.changed_cache_file(md5):
            msg = ("Missing cache for directory '{}'. "
                   "Cache for files inside will be lost. "
                   "Would you like to continue? Use '-f' to force. ")
            if not force and not prompt.confirm(msg):
                raise DvcException(
                    "unable to fully collect used cache"
                    " without cache for directory '{}'".format(out))
            else:
                return ret

        for i in self.cache.local.load_dir_cache(md5):
            i["branch"] = branch
            i[r.PARAM_PATH] = os.path.join(info[r.PARAM_PATH],
                                           i[r.PARAM_RELPATH])
            ret.append(i)

        return ret
Example #12
0
    def upload(self, from_infos, to_infos, names=None):
        names = self._verify_path_args(to_infos, from_infos, names)

        for from_info, to_info, name in zip(from_infos, to_infos, names):
            if to_info["scheme"] != "local":
                raise NotImplementedError

            if from_info["scheme"] != "local":
                raise NotImplementedError

            logger.debug("Uploading '{}' to '{}'".format(
                from_info["path"], to_info["path"]))

            if not name:
                name = os.path.basename(from_info["path"])

            makedirs(os.path.dirname(to_info["path"]), exist_ok=True)

            try:
                copyfile(from_info["path"], to_info["path"], name=name)
            except Exception:
                logger.error("failed to upload '{}' to '{}'".format(
                    from_info["path"], to_info["path"]))
Example #13
0
    def upload(self, from_infos, to_infos, names=None):
        names = self._verify_path_args(to_infos, from_infos, names)

        for from_info, to_info, name in zip(from_infos, to_infos, names):
            if to_info['scheme'] != 'local':
                raise NotImplementedError

            if from_info['scheme'] != 'local':
                raise NotImplementedError

            logger.debug("Uploading '{}' to '{}'".format(
                from_info['path'], to_info['path']))

            if not name:
                name = os.path.basename(from_info['path'])

            self._makedirs(to_info['path'])

            try:
                copyfile(from_info['path'], to_info['path'], name=name)
            except Exception:
                logger.error("failed to upload '{}' to '{}'".format(
                    from_info['path'], to_info['path']))
Example #14
0
    def ssh(self, host=None, user=None, port=None):
        msg = "Establishing ssh connection with '{}' " \
              "through port '{}' as user '{}'"
        logger.debug(msg.format(host, port, user))

        ssh = paramiko.SSHClient()

        ssh.load_system_host_keys()
        ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())

        if self.ask_password and self.password is None:
            msg = ("Enter a private key passphrase or a password for "
                   "host '{}' port '{}' user '{}'").format(host, port, user)
            self.password = prompt.password(msg)

        ssh.connect(host,
                    username=user,
                    port=port,
                    key_filename=self.keyfile,
                    timeout=self.timeout,
                    password=self.password)

        return ssh
Example #15
0
    def _unprotect_file(path):
        if System.is_symlink(path) or System.is_hardlink(path):
            logger.debug("Unprotecting '{}'".format(path))
            tmp = os.path.join(os.path.dirname(path), "." + str(uuid.uuid4()))

            # The operations order is important here - if some application
            # would access the file during the process of copyfile then it
            # would get only the part of file. So, at first, the file should be
            # copied with the temporary name, and then original file should be
            # replaced by new.
            copyfile(
                path,
                tmp,
                name="Unprotecting '{}'".format(os.path.relpath(path)),
            )
            remove(path)
            os.rename(tmp, path)

        else:
            logger.debug("Skipping copying for '{}', since it is not "
                         "a symlink or a hardlink.".format(path))

        os.chmod(path, os.stat(path).st_mode | stat.S_IWRITE)
Example #16
0
    def upload(self, from_infos, to_infos, names=None):
        names = self._verify_path_args(to_infos, from_infos, names)

        s3 = self.s3

        for from_info, to_info, name in zip(from_infos, to_infos, names):
            if to_info["scheme"] != "s3":
                raise NotImplementedError

            if from_info["scheme"] != "local":
                raise NotImplementedError

            logger.debug(
                "Uploading '{}' to '{}/{}'".format(
                    from_info["path"], to_info["bucket"], to_info["path"]
                )
            )

            if not name:
                name = os.path.basename(from_info["path"])

            total = os.path.getsize(from_info["path"])
            cb = Callback(name, total)

            try:
                s3.upload_file(
                    from_info["path"],
                    to_info["bucket"],
                    to_info["path"],
                    Callback=cb,
                )
            except Exception:
                msg = "failed to upload '{}'".format(from_info["path"])
                logger.error(msg)
                continue

            progress.finish_target(name)
Example #17
0
File: gs.py Project: ei-grad/dvc
    def upload(self, from_infos, to_infos, names=None):
        names = self._verify_path_args(to_infos, from_infos, names)

        gs = self.gs

        for from_info, to_info, name in zip(from_infos, to_infos, names):
            if to_info["scheme"] != "gs":
                raise NotImplementedError

            if from_info["scheme"] != "local":
                raise NotImplementedError

            logger.debug(
                "Uploading '{}' to '{}/{}'".format(
                    from_info["path"], to_info["bucket"], to_info["path"]
                )
            )

            if not name:
                name = os.path.basename(from_info["path"])

            progress.update_target(name, 0, None)

            try:
                bucket = gs.bucket(to_info["bucket"])
                blob = bucket.blob(to_info["path"])
                blob.upload_from_filename(from_info["path"])
            except Exception:
                msg = "failed to upload '{}' to '{}/{}'"
                logger.error(
                    msg.format(
                        from_info["path"], to_info["bucket"], to_info["path"]
                    )
                )
                continue

            progress.finish_target(name)
Example #18
0
 def blob_service(self):
     if self.__blob_service is None:
         logger.debug('URL {}'.format(self.url))
         logger.debug('Connection string {}'.format(self.connection_string))
         self.__blob_service = BlockBlobService(
             connection_string=self.connection_string)
         logger.debug('Container name {}'.format(self.bucket))
         self.__blob_service.create_container(self.bucket)
     return self.__blob_service
Example #19
0
 def blob_service(self):
     if self.__blob_service is None:
         logger.debug("URL {}".format(self.url))
         logger.debug("Connection string {}".format(self.connection_string))
         self.__blob_service = BlockBlobService(
             connection_string=self.connection_string
         )
         logger.debug("Container name {}".format(self.bucket))
         try:  # verify that container exists
             self.__blob_service.list_blobs(
                 self.bucket, delimiter="/", num_results=1
             )
         except AzureMissingResourceHttpError:
             self.__blob_service.create_container(self.bucket)
     return self.__blob_service
Example #20
0
    def link(self, cache, path):
        assert os.path.isfile(cache)

        dname = os.path.dirname(path)
        if not os.path.exists(dname):
            os.makedirs(dname)

        # NOTE: just create an empty file for an empty cache
        if os.path.getsize(cache) == 0:
            open(path, "w+").close()

            msg = "Created empty file: {} -> {}".format(cache, path)
            logger.debug(msg)
            return

        i = len(self.cache_types)
        while i > 0:
            try:
                self.CACHE_TYPE_MAP[self.cache_types[0]](cache, path)

                if self.protected:
                    os.chmod(path, stat.S_IREAD | stat.S_IRGRP | stat.S_IROTH)

                msg = "Created {}'{}': {} -> {}".format(
                    "protected " if self.protected else "",
                    self.cache_types[0],
                    cache,
                    path,
                )

                logger.debug(msg)
                return

            except DvcException as exc:
                msg = "Cache type '{}' is not supported: {}"
                logger.debug(msg.format(self.cache_types[0], str(exc)))
                del self.cache_types[0]
                i -= 1

        raise DvcException("no possible cache types left to try out.")
Example #21
0
    def test_debug(self):
        with logger.verbose():
            logger.debug("message")

        self.assertEqual(self.stdout, "Debug: message\n")
Example #22
0
def percent_cb(name, complete, total):
    """ Callback for updating target progress """
    logger.debug("{}: {} transferred out of {}".format(name,
                                                       sizeof_fmt(complete),
                                                       sizeof_fmt(total)))
    progress.update_target(name, complete, total)
Example #23
0
 def _is_outdated_file(self):
     ctime = os.path.getmtime(self.updater_file)
     outdated = (time.time() - ctime >= self.TIMEOUT)
     if outdated:
         logger.debug("'{}' is outdated(".format(self.updater_file))
     return outdated
Example #24
0
    def _do_update(self, path):
        """
        Make sure the stored info for the given path is up to date.
        """
        if not os.path.exists(path):
            return (None, None)

        actual_mtime, actual_size = self._mtime_and_size(path)
        actual_inode = self._inode(path)

        cmd = "SELECT * from {} WHERE inode={}".format(
            self.STATE_TABLE, self._to_sqlite(actual_inode)
        )

        self._execute(cmd)
        ret = self._fetchall()
        if not ret:
            md5, info = self._collect(path)
            cmd = (
                "INSERT INTO {}(inode, mtime, size, md5, timestamp) "
                'VALUES ({}, "{}", "{}", "{}", "{}")'
            )
            self._execute(
                cmd.format(
                    self.STATE_TABLE,
                    self._to_sqlite(actual_inode),
                    actual_mtime,
                    actual_size,
                    md5,
                    int(nanotime.timestamp(time.time())),
                )
            )
            self.inserts += 1
        else:
            assert len(ret) == 1
            assert len(ret[0]) == 5
            inode, mtime, size, md5, _ = ret[0]
            inode = self._from_sqlite(inode)
            assert inode == actual_inode
            logger.debug(
                "Inode '{}', mtime '{}', actual mtime '{}', size '{}', "
                "actual size '{}'.".format(
                    inode, mtime, actual_mtime, size, actual_size
                )
            )
            if actual_mtime != mtime or actual_size != size:
                md5, info = self._collect(path)
                cmd = (
                    "UPDATE {} SET "
                    'mtime = "{}", size = "{}", '
                    'md5 = "{}", timestamp = "{}" '
                    "WHERE inode = {}"
                )
                self._execute(
                    cmd.format(
                        self.STATE_TABLE,
                        actual_mtime,
                        actual_size,
                        md5,
                        int(nanotime.timestamp(time.time())),
                        self._to_sqlite(actual_inode),
                    )
                )
            else:
                info = None
                cmd = 'UPDATE {} SET timestamp = "{}" WHERE inode = {}'
                self._execute(
                    cmd.format(
                        self.STATE_TABLE,
                        int(nanotime.timestamp(time.time())),
                        self._to_sqlite(actual_inode),
                    )
                )

        return (md5, info)
Example #25
0
 def _inode(path):
     logger.debug("Path {} inode {}".format(path, System.inode(path)))
     return System.inode(path)
Example #26
0
 def _fetchall(self):
     ret = self.cursor.fetchall()
     logger.debug("fetched: {}".format(ret))
     return ret
Example #27
0
 def _execute(self, cmd):
     logger.debug(cmd)
     return self.cursor.execute(cmd)
Example #28
0
    def changed(self, path_info, checksum_info):
        """Checks if data has changed.

        A file is considered changed if:
            - It doesn't exist on the working directory (was unlinked)
            - Checksum is not computed (saving a new file)
            - The checkusm stored in the State is different from the given one
            - There's no file in the cache

        Args:
            path_info: dict with path information.
            checksum: expected checksum for this data.

        Returns:
            bool: True if data has changed, False otherwise.
        """

        logger.debug("checking if '{}'('{}') has changed.".format(
            path_info, checksum_info))

        if not self.exists(path_info):
            logger.debug("'{}' doesn't exist.".format(path_info))
            return True

        checksum = checksum_info.get(self.PARAM_CHECKSUM)
        if checksum is None:
            logger.debug("checksum for '{}' is missing.".format(path_info))
            return True

        if self.changed_cache(checksum):
            logger.debug("cache for '{}'('{}') has changed.".format(
                path_info, checksum))
            return True

        actual = self.save_info(path_info)[self.PARAM_CHECKSUM]
        if checksum != actual:
            logger.debug(
                "checksum '{}'(actual '{}') for '{}' has changed.".format(
                    checksum, actual, path_info))
            return True

        logger.debug("'{}' hasn't changed.".format(path_info))
        return False
Example #29
0
 def inode(path):
     logger.debug('Path {} inode {}'.format(path, System.inode(path)))
     return System.inode(path)
Example #30
0
File: pkg.py Project: rpip/dvc
    def install_or_update(self, parent_repo, pkg_params):
        from git.cmd import Git

        if not self.is_in_root():
            raise DvcException(
                "This command can be run only from a repository root"
            )

        if not os.path.exists(self.MODULES_DIR):
            logger.debug("Creating modules dir {}".format(self.MODULES_DIR))
            os.makedirs(self.MODULES_DIR)
            parent_repo.scm.ignore(os.path.abspath(self.MODULES_DIR))

        module_name = (
            Git.polish_url(pkg_params.address).strip("/").split("/")[-1]
        )
        if not module_name:
            raise DvcException(
                "Package address error: unable to extract package name"
            )

        with TempGitRepo(
            pkg_params.address, module_name, Package.MODULES_DIR
        ) as tmp_repo:
            outputs_to_copy = tmp_repo.outs
            if pkg_params.select:
                outputs_to_copy = list(
                    filter(
                        lambda out: out.dvc_path in pkg_params.select,
                        outputs_to_copy,
                    )
                )

            fetched_stage_files = set(
                map(lambda o: o.stage.path, outputs_to_copy)
            )
            tmp_repo.fetch(fetched_stage_files)

            module_dir = self.create_module_dir(module_name)
            tmp_repo.persist_to(module_dir, parent_repo)

            dvc_file = self.get_dvc_file_name(
                pkg_params.file, pkg_params.target_dir, module_name
            )
            try:
                self.persist_stage_and_scm_state(
                    parent_repo,
                    outputs_to_copy,
                    pkg_params.target_dir,
                    dvc_file,
                )
            except Exception as ex:
                raise DvcException(
                    "Package '{}' was installed "
                    "but stage file '{}' "
                    "was not created properly: {}".format(
                        pkg_params.address, dvc_file, ex
                    )
                )

        parent_repo.checkout(dvc_file)