Example #1
0
    def upload(self, from_infos, to_infos, names=None):
        names = self._verify_path_args(to_infos, from_infos, names)

        for from_info, to_info, name in zip(from_infos, to_infos, names):
            if to_info['scheme'] != self.scheme:
                raise NotImplementedError

            if from_info['scheme'] != 'local':
                raise NotImplementedError

            bucket = to_info['bucket']
            key = to_info['key']

            logger.debug("Uploading '{}' to '{}/{}'".format(
                from_info['path'], bucket, key))

            if not name:
                name = os.path.basename(from_info['path'])

            cb = Callback(name)

            try:
                self.blob_service.create_blob_from_path(bucket,
                                                        key,
                                                        from_info['path'],
                                                        progress_callback=cb)
            except Exception as ex:
                msg = "Failed to upload '{}'".format(from_info['path'])
                logger.warn(msg, ex)
            else:
                progress.finish_target(name)
Example #2
0
    def checkout(self, path_info, checksum_info):
        if path_info['scheme'] != 'ssh':
            raise NotImplementedError

        md5 = checksum_info.get(self.PARAM_MD5, None)
        if not md5:
            return

        if not self.changed(path_info, checksum_info):
            msg = "Data '{}' didn't change."
            logger.info(msg.format(self.to_string(path_info)))
            return

        if self.changed_cache(md5):
            msg = "Cache '{}' not found. File '{}' won't be created."
            logger.warn(msg.format(md5, self.to_string(path_info)))
            return

        if self.exists([path_info])[0]:
            msg = "Data '{}' exists. Removing before checkout."
            logger.warn(msg.format(self.to_string(path_info)))
            self.remove(path_info)
            return

        msg = "Checking out '{}' with cache '{}'."
        logger.info(msg.format(self.to_string(path_info), md5))

        src = path_info.copy()
        src['path'] = posixpath.join(self.prefix, md5[0:2], md5[2:])

        self.cp(src, path_info)
Example #3
0
    def checkout(self, path_info, checksum_info):
        if path_info['scheme'] != 's3':
            raise NotImplementedError

        etag = checksum_info.get(self.PARAM_ETAG, None)
        if not etag:
            return

        if not self.changed(path_info, checksum_info):
            msg = "Data '{}' didn't change."
            logger.info(msg.format(self.to_string(path_info)))
            return

        if self.changed_cache(etag):
            msg = "Cache '{}' not found. File '{}' won't be created."
            logger.warn(msg.format(etag, self.to_string(path_info)))
            return

        if self.exists([path_info])[0]:
            msg = "Data '{}' exists. Removing before checkout."
            logger.warn(msg.format(self.to_string(path_info)))
            self.remove(path_info)
            return

        msg = "Checking out '{}' with cache '{}'."
        logger.info(msg.format(self.to_string(path_info), etag))

        key = posixpath.join(self.prefix, etag[0:2], etag[2:])
        from_info = {'scheme': 's3', 'bucket': self.bucket, 'key': key}

        self._copy(from_info, path_info)
Example #4
0
    def download(self,
                 from_infos,
                 to_infos,
                 no_progress_bar=False,
                 names=None):
        names = self._verify_path_args(from_infos, to_infos, names)

        for to_info, from_info, name in zip(to_infos, from_infos, names):
            if from_info['scheme'] != 'local':
                raise NotImplementedError

            if to_info['scheme'] != 'local':
                raise NotImplementedError

            logger.debug("Downloading '{}' to '{}'".format(
                from_info['path'], to_info['path']))

            if not name:
                name = os.path.basename(to_info['path'])

            self._makedirs(to_info['path'])
            tmp_file = self.tmp_file(to_info['path'])
            try:
                copyfile(from_info['path'],
                         tmp_file,
                         no_progress_bar=no_progress_bar,
                         name=name)
            except Exception as exc:
                msg = "Failed to download '{}' to '{}'"
                logger.warn(msg.format(from_info['path'], to_info['path']),
                            exc)
                continue

            os.rename(tmp_file, to_info['path'])
Example #5
0
    def _init_cloud(self, cloud_config, cloud_type):
        global_storage_path = self._core.get(Config.SECTION_CORE_STORAGEPATH)
        if global_storage_path:
            logger.warn('Using obsoleted config format. Consider updating.')

        cloud = cloud_type(self.project, cloud_config)
        return cloud
Example #6
0
    def checkout(self, path_info, checksum_info):
        if path_info['scheme'] != 'hdfs':
            raise NotImplementedError

        assert path_info.get('url')

        checksum = checksum_info.get(self.PARAM_CHECKSUM, None)
        if not checksum:
            return

        if not self.changed(path_info, checksum_info):
            msg = "Data '{}' didn't change."
            logger.info(msg.format(self.to_string(path_info)))
            return

        if self.changed_cache(checksum):
            msg = "Cache '{}' not found. File '{}' won't be created."
            logger.warn(msg.format(checksum, self.to_string(path_info)))
            return

        if self.exists([path_info])[0]:
            msg = "Data '{}' exists. Removing before checkout."
            logger.warn(msg.format(self.to_string(path_info)))
            self.remove(path_info)
            return

        msg = "Checking out '{}' with cache '{}'."
        logger.info(msg.format(self.to_string(path_info), checksum))

        src = path_info.copy()
        src['url'] = posixpath.join(self.url, checksum[0:2], checksum[2:])

        self.cp(src, path_info)
Example #7
0
    def upload(self, from_infos, to_infos, names=None):
        names = self._verify_path_args(to_infos, from_infos, names)

        s3 = self.s3

        for from_info, to_info, name in zip(from_infos, to_infos, names):
            if to_info['scheme'] != 's3':
                raise NotImplementedError

            if from_info['scheme'] != 'local':
                raise NotImplementedError

            logger.debug("Uploading '{}' to '{}/{}'".format(
                from_info['path'], to_info['bucket'], to_info['key']))

            if not name:
                name = os.path.basename(from_info['path'])

            total = os.path.getsize(from_info['path'])
            cb = Callback(name, total)

            try:
                s3.upload_file(from_info['path'],
                               to_info['bucket'],
                               to_info['key'],
                               Callback=cb)
            except Exception as exc:
                msg = "Failed to upload '{}'".format(from_info['path'])
                logger.warn(msg, exc)
                continue

            progress.finish_target(name)
Example #8
0
    def supported(cls, config):
        url = config[Config.SECTION_REMOTE_URL]
        url_ok = cls.match(url) is not None
        deps_ok = all(cls.REQUIRES.values())
        if url_ok and not deps_ok:
            missing = [k for k, v in cls.REQUIRES.items() if v is None]
            msg = "URL \'{}\' is supported but requires these missing " \
                  "dependencies: {}. If you have installed dvc using pip, " \
                  "choose one of these options to proceed: \n" \
                  "\n" \
                  "    1) Install specific missing dependencies:\n" \
                  "        pip install {}\n" \
                  "    2) Install dvc package that includes those missing " \
                  "dependencies: \n" \
                  "        pip install dvc[{}]\n" \
                  "    3) Install dvc package with all possible " \
                  "dependencies included: \n" \
                  "        pip install dvc[all]\n" \
                  "\n" \
                  "If you have installed dvc from a binary package and you " \
                  "are still seeing this message, please report it to us " \
                  "using https://github.com/iterative/dvc/issues. Thank you!"
            msg = msg.format(url, missing, " ".join(missing), cls.scheme)
            logger.warn(msg)

        return url_ok and deps_ok
Example #9
0
 def changed_cache_file(self, md5):
     cache = self.get(md5)
     if self.state.changed(cache, md5=md5):
         if os.path.exists(cache):
             msg = 'Corrupted cache file {}.'
             logger.warn(msg.format(os.path.relpath(cache)))
             remove(cache)
         return True
     return False
Example #10
0
    def download(self,
                 from_infos,
                 to_infos,
                 no_progress_bar=False,
                 names=None):
        names = self._verify_path_args(from_infos, to_infos, names)

        s3 = self.s3

        for to_info, from_info, name in zip(to_infos, from_infos, names):
            if from_info['scheme'] != 's3':
                raise NotImplementedError

            if to_info['scheme'] == 's3':
                self._copy(from_info, to_info, s3=s3)
                continue

            if to_info['scheme'] != 'local':
                raise NotImplementedError

            msg = "Downloading '{}/{}' to '{}'".format(from_info['bucket'],
                                                       from_info['key'],
                                                       to_info['path'])
            logger.debug(msg)

            tmp_file = self.tmp_file(to_info['path'])
            if not name:
                name = os.path.basename(to_info['path'])

            self._makedirs(to_info['path'])

            try:
                if no_progress_bar:
                    cb = None
                else:
                    total = s3.head_object(
                        Bucket=from_info['bucket'],
                        Key=from_info['key'])['ContentLength']
                    cb = Callback(name, total)

                s3.download_file(from_info['bucket'],
                                 from_info['key'],
                                 tmp_file,
                                 Callback=cb)
            except Exception as exc:
                msg = "Failed to download '{}/{}'".format(
                    from_info['bucket'], from_info['key'])
                logger.warn(msg, exc)
                continue

            os.rename(tmp_file, to_info['path'])

            if not no_progress_bar:
                progress.finish_target(name)
Example #11
0
    def _cloud(self):
        remote = self._core.get(Config.SECTION_CORE_REMOTE, '')
        if remote != '':
            return self._init_remote(remote)

        if self._core.get(Config.SECTION_CORE_CLOUD, None):
            # backward compatibility
            msg = 'Using obsoleted config format. Consider updating.'
            logger.warn(msg)
            return self._init_compat()

        return None
Example #12
0
    def changed_cache(self, etag):
        key = posixpath.join(self.prefix, etag[0:2], etag[2:])
        cache = {'scheme': 's3', 'bucket': self.bucket, 'key': key}

        if {self.PARAM_ETAG: etag} != self.save_info(cache):
            if self.exists([cache])[0]:
                msg = 'Corrupted cache file {}'
                logger.warn(msg.format(self.to_string(cache)))
                self.remove(cache)
            return True

        return False
Example #13
0
    def changed_cache(self, checksum):
        cache = {}
        cache['scheme'] = 'hdfs'
        cache['user'] = self.user
        cache['url'] = posixpath.join(self.url, checksum[0:2], checksum[2:])

        if {self.PARAM_CHECKSUM: checksum} != self.save_info(cache):
            if self.exists([cache])[0]:
                msg = 'Corrupted cache file {}'
                logger.warn(msg.format(self.to_string(cache)))
                self.remove(cache)
            return True

        return False
Example #14
0
    def download(self,
                 from_infos,
                 to_infos,
                 no_progress_bar=False,
                 names=None):
        names = self._verify_path_args(from_infos, to_infos, names)

        ssh = self.ssh(host=from_infos[0]['host'],
                       user=from_infos[0]['user'],
                       port=from_infos[0]['port'])

        for to_info, from_info, name in zip(to_infos, from_infos, names):
            if from_info['scheme'] != 'ssh':
                raise NotImplementedError

            if to_info['scheme'] == 'ssh':
                assert from_info['host'] == to_info['host']
                assert from_info['port'] == to_info['port']
                assert from_info['user'] == to_info['user']
                self.cp(from_info, to_info, ssh=ssh)
                continue

            if to_info['scheme'] != 'local':
                raise NotImplementedError

            msg = "Downloading '{}/{}' to '{}'".format(from_info['host'],
                                                       from_info['path'],
                                                       to_info['path'])
            logger.debug(msg)

            if not name:
                name = os.path.basename(to_info['path'])

            self._makedirs(to_info['path'])
            tmp_file = self.tmp_file(to_info['path'])
            try:
                ssh.open_sftp().get(from_info['path'],
                                    tmp_file,
                                    callback=create_cb(name))
            except Exception as exc:
                msg = "Failed to download '{}/{}' to '{}'"
                logger.warn(
                    msg.format(from_info['host'], from_info['path'],
                               to_info['path']), exc)
                continue

            os.rename(tmp_file, to_info['path'])
            progress.finish_target(name)

        ssh.close()
Example #15
0
    def changed_cache(self, md5):
        cache = {}
        cache['scheme'] = 'ssh'
        cache['host'] = self.host
        cache['port'] = self.port
        cache['user'] = self.user
        cache['path'] = posixpath.join(self.prefix, md5[0:2], md5[2:])

        if {self.PARAM_MD5: md5} != self.save_info(cache):
            if self.exists([cache])[0]:
                msg = 'Corrupted cache file {}'
                logger.warn(msg.format(self.to_string(cache)))
                self.remove(cache)
            return True

        return False
Example #16
0
    def download(self,
                 from_infos,
                 to_infos,
                 no_progress_bar=False,
                 names=None):
        names = self._verify_path_args(to_infos, from_infos, names)

        for to_info, from_info, name in zip(to_infos, from_infos, names):
            if from_info['scheme'] not in ['http', 'https']:
                raise NotImplementedError

            if to_info['scheme'] != 'local':
                raise NotImplementedError

            msg = "Downloading '{}' to '{}'".format(from_info['url'],
                                                    to_info['path'])
            logger.debug(msg)

            tmp_file = self.tmp_file(to_info['path'])
            if not name:
                name = os.path.basename(to_info['path'])

            self._makedirs(to_info['path'])

            total = self._content_length(from_info['url'])

            if no_progress_bar or not total:
                cb = None
            else:
                cb = ProgressBarCallback(name, total)

            try:
                self._download_to(from_info['url'], tmp_file, callback=cb)
            except Exception as exc:
                msg = "Failed to download '{}'".format(from_info['url'])
                logger.warn(msg, exc)
                continue

            os.rename(tmp_file, to_info['path'])

            if not no_progress_bar:
                progress.finish_target(name)
Example #17
0
    def download(self,
                 from_infos,
                 to_infos,
                 no_progress_bar=False,
                 names=None):
        names = self._verify_path_args(from_infos, to_infos, names)

        for to_info, from_info, name in zip(to_infos, from_infos, names):
            if from_info['scheme'] != self.scheme:
                raise NotImplementedError

            if to_info['scheme'] != 'local':
                raise NotImplementedError

            bucket = from_info['bucket']
            key = from_info['key']

            logger.debug("Downloading '{}/{}' to '{}'".format(
                bucket, key, to_info['path']))

            tmp_file = self.tmp_file(to_info['path'])
            if not name:
                name = os.path.basename(to_info['path'])

            cb = None if no_progress_bar else Callback(name)

            self._makedirs(to_info['path'])

            try:
                self.blob_service.get_blob_to_path(bucket,
                                                   key,
                                                   tmp_file,
                                                   progress_callback=cb)
            except Exception as exc:
                msg = "Failed to download '{}/{}'".format(bucket, key)
                logger.warn(msg, exc)
            else:
                os.rename(tmp_file, to_info['path'])

                if not no_progress_bar:
                    progress.finish_target(name)
Example #18
0
    def upload(self, from_infos, to_infos, names=None):
        names = self._verify_path_args(to_infos, from_infos, names)

        ssh = self.ssh(host=to_infos[0]['host'],
                       user=to_infos[0]['user'],
                       port=to_infos[0]['port'])
        sftp = ssh.open_sftp()

        for from_info, to_info, name in zip(from_infos, to_infos, names):
            if to_info['scheme'] != 'ssh':
                raise NotImplementedError

            if from_info['scheme'] != 'local':
                raise NotImplementedError

            logger.debug("Uploading '{}' to '{}/{}'".format(
                from_info['path'], to_info['host'], to_info['path']))

            if not name:
                name = os.path.basename(from_info['path'])

            dname = posixpath.dirname(to_info['path'])
            self._exec(ssh, 'mkdir -p {}'.format(dname))

            try:
                sftp.put(from_info['path'],
                         to_info['path'],
                         callback=create_cb(name))
            except Exception as exc:
                msg = "Failed to upload '{}' to '{}/{}'"
                logger.warn(
                    msg.format(from_info['path'], to_info['host'],
                               to_info['path'], exc))
                continue

            progress.finish_target(name)

        sftp.close()
        ssh.close()
Example #19
0
    def upload(self, from_infos, to_infos, names=None):
        names = self._verify_path_args(to_infos, from_infos, names)

        for from_info, to_info, name in zip(from_infos, to_infos, names):
            if to_info['scheme'] != 'local':
                raise NotImplementedError

            if from_info['scheme'] != 'local':
                raise NotImplementedError

            logger.debug("Uploading '{}' to '{}'".format(
                from_info['path'], to_info['path']))

            if not name:
                name = os.path.basename(from_info['path'])

            self._makedirs(to_info['path'])

            try:
                copyfile(from_info['path'], to_info['path'], name=name)
            except Exception as exc:
                msg = "Failed to upload '{}' tp '{}'"
                logger.warn(msg.format(from_info['path'], to_info['path']),
                            exc)
Example #20
0
    def checkout(self, path_info, checksum_info, force=False):
        path = path_info['path']
        md5 = checksum_info.get(self.PARAM_MD5)
        cache = self.get(md5)

        if not cache:
            msg = 'No cache info for \'{}\'. Skipping checkout.'
            logger.warn(msg.format(os.path.relpath(path)))
            return

        if not self.changed(path_info, checksum_info):
            msg = "Data '{}' didn't change."
            logger.info(msg.format(os.path.relpath(path)))
            return

        if self.changed_cache(md5):
            msg = u'Cache \'{}\' not found. File \'{}\' won\'t be created.'
            logger.warn(msg.format(md5, os.path.relpath(path)))
            remove(path)
            return

        msg = u'Checking out \'{}\' with cache \'{}\'.'
        logger.info(msg.format(os.path.relpath(path), md5))

        if not self.is_dir_cache(cache):
            if os.path.exists(path):
                if force or self._already_cached(path):
                    remove(path)
                else:
                    self._safe_remove(path)

            self.link(cache, path)
            self.state.update_link(path)
            return

        # Create dir separately so that dir is created
        # even if there are no files in it
        if not os.path.exists(path):
            os.makedirs(path)

        dir_info = self.load_dir_cache(md5)
        dir_relpath = os.path.relpath(path)
        dir_size = len(dir_info)
        bar = dir_size > LARGE_DIR_SIZE

        logger.info("Linking directory '{}'.".format(dir_relpath))

        for processed, entry in enumerate(dir_info):
            relpath = entry[self.PARAM_RELPATH]
            m = entry[self.PARAM_MD5]
            p = os.path.join(path, relpath)
            c = self.get(m)

            entry_info = {'scheme': path_info['scheme'], self.PARAM_PATH: p}

            entry_checksum_info = {self.PARAM_MD5: m}

            if self.changed(entry_info, entry_checksum_info):
                if os.path.exists(p):
                    if force or self._already_cached(p):
                        remove(p)
                    else:
                        self._safe_remove(p)

                self.link(c, p)

            if bar:
                progress.update_target(dir_relpath, processed, dir_size)

        self._discard_working_directory_changes(path, dir_info, force=force)

        self.state.update_link(path)

        if bar:
            progress.finish_target(dir_relpath)