def upload(self, from_infos, to_infos, names=None): names = self._verify_path_args(to_infos, from_infos, names) for from_info, to_info, name in zip(from_infos, to_infos, names): if to_info['scheme'] != self.scheme: raise NotImplementedError if from_info['scheme'] != 'local': raise NotImplementedError bucket = to_info['bucket'] key = to_info['key'] logger.debug("Uploading '{}' to '{}/{}'".format( from_info['path'], bucket, key)) if not name: name = os.path.basename(from_info['path']) cb = Callback(name) try: self.blob_service.create_blob_from_path(bucket, key, from_info['path'], progress_callback=cb) except Exception as ex: msg = "Failed to upload '{}'".format(from_info['path']) logger.warn(msg, ex) else: progress.finish_target(name)
def checkout(self, path_info, checksum_info): if path_info['scheme'] != 'ssh': raise NotImplementedError md5 = checksum_info.get(self.PARAM_MD5, None) if not md5: return if not self.changed(path_info, checksum_info): msg = "Data '{}' didn't change." logger.info(msg.format(self.to_string(path_info))) return if self.changed_cache(md5): msg = "Cache '{}' not found. File '{}' won't be created." logger.warn(msg.format(md5, self.to_string(path_info))) return if self.exists([path_info])[0]: msg = "Data '{}' exists. Removing before checkout." logger.warn(msg.format(self.to_string(path_info))) self.remove(path_info) return msg = "Checking out '{}' with cache '{}'." logger.info(msg.format(self.to_string(path_info), md5)) src = path_info.copy() src['path'] = posixpath.join(self.prefix, md5[0:2], md5[2:]) self.cp(src, path_info)
def checkout(self, path_info, checksum_info): if path_info['scheme'] != 's3': raise NotImplementedError etag = checksum_info.get(self.PARAM_ETAG, None) if not etag: return if not self.changed(path_info, checksum_info): msg = "Data '{}' didn't change." logger.info(msg.format(self.to_string(path_info))) return if self.changed_cache(etag): msg = "Cache '{}' not found. File '{}' won't be created." logger.warn(msg.format(etag, self.to_string(path_info))) return if self.exists([path_info])[0]: msg = "Data '{}' exists. Removing before checkout." logger.warn(msg.format(self.to_string(path_info))) self.remove(path_info) return msg = "Checking out '{}' with cache '{}'." logger.info(msg.format(self.to_string(path_info), etag)) key = posixpath.join(self.prefix, etag[0:2], etag[2:]) from_info = {'scheme': 's3', 'bucket': self.bucket, 'key': key} self._copy(from_info, path_info)
def download(self, from_infos, to_infos, no_progress_bar=False, names=None): names = self._verify_path_args(from_infos, to_infos, names) for to_info, from_info, name in zip(to_infos, from_infos, names): if from_info['scheme'] != 'local': raise NotImplementedError if to_info['scheme'] != 'local': raise NotImplementedError logger.debug("Downloading '{}' to '{}'".format( from_info['path'], to_info['path'])) if not name: name = os.path.basename(to_info['path']) self._makedirs(to_info['path']) tmp_file = self.tmp_file(to_info['path']) try: copyfile(from_info['path'], tmp_file, no_progress_bar=no_progress_bar, name=name) except Exception as exc: msg = "Failed to download '{}' to '{}'" logger.warn(msg.format(from_info['path'], to_info['path']), exc) continue os.rename(tmp_file, to_info['path'])
def _init_cloud(self, cloud_config, cloud_type): global_storage_path = self._core.get(Config.SECTION_CORE_STORAGEPATH) if global_storage_path: logger.warn('Using obsoleted config format. Consider updating.') cloud = cloud_type(self.project, cloud_config) return cloud
def checkout(self, path_info, checksum_info): if path_info['scheme'] != 'hdfs': raise NotImplementedError assert path_info.get('url') checksum = checksum_info.get(self.PARAM_CHECKSUM, None) if not checksum: return if not self.changed(path_info, checksum_info): msg = "Data '{}' didn't change." logger.info(msg.format(self.to_string(path_info))) return if self.changed_cache(checksum): msg = "Cache '{}' not found. File '{}' won't be created." logger.warn(msg.format(checksum, self.to_string(path_info))) return if self.exists([path_info])[0]: msg = "Data '{}' exists. Removing before checkout." logger.warn(msg.format(self.to_string(path_info))) self.remove(path_info) return msg = "Checking out '{}' with cache '{}'." logger.info(msg.format(self.to_string(path_info), checksum)) src = path_info.copy() src['url'] = posixpath.join(self.url, checksum[0:2], checksum[2:]) self.cp(src, path_info)
def upload(self, from_infos, to_infos, names=None): names = self._verify_path_args(to_infos, from_infos, names) s3 = self.s3 for from_info, to_info, name in zip(from_infos, to_infos, names): if to_info['scheme'] != 's3': raise NotImplementedError if from_info['scheme'] != 'local': raise NotImplementedError logger.debug("Uploading '{}' to '{}/{}'".format( from_info['path'], to_info['bucket'], to_info['key'])) if not name: name = os.path.basename(from_info['path']) total = os.path.getsize(from_info['path']) cb = Callback(name, total) try: s3.upload_file(from_info['path'], to_info['bucket'], to_info['key'], Callback=cb) except Exception as exc: msg = "Failed to upload '{}'".format(from_info['path']) logger.warn(msg, exc) continue progress.finish_target(name)
def supported(cls, config): url = config[Config.SECTION_REMOTE_URL] url_ok = cls.match(url) is not None deps_ok = all(cls.REQUIRES.values()) if url_ok and not deps_ok: missing = [k for k, v in cls.REQUIRES.items() if v is None] msg = "URL \'{}\' is supported but requires these missing " \ "dependencies: {}. If you have installed dvc using pip, " \ "choose one of these options to proceed: \n" \ "\n" \ " 1) Install specific missing dependencies:\n" \ " pip install {}\n" \ " 2) Install dvc package that includes those missing " \ "dependencies: \n" \ " pip install dvc[{}]\n" \ " 3) Install dvc package with all possible " \ "dependencies included: \n" \ " pip install dvc[all]\n" \ "\n" \ "If you have installed dvc from a binary package and you " \ "are still seeing this message, please report it to us " \ "using https://github.com/iterative/dvc/issues. Thank you!" msg = msg.format(url, missing, " ".join(missing), cls.scheme) logger.warn(msg) return url_ok and deps_ok
def changed_cache_file(self, md5): cache = self.get(md5) if self.state.changed(cache, md5=md5): if os.path.exists(cache): msg = 'Corrupted cache file {}.' logger.warn(msg.format(os.path.relpath(cache))) remove(cache) return True return False
def download(self, from_infos, to_infos, no_progress_bar=False, names=None): names = self._verify_path_args(from_infos, to_infos, names) s3 = self.s3 for to_info, from_info, name in zip(to_infos, from_infos, names): if from_info['scheme'] != 's3': raise NotImplementedError if to_info['scheme'] == 's3': self._copy(from_info, to_info, s3=s3) continue if to_info['scheme'] != 'local': raise NotImplementedError msg = "Downloading '{}/{}' to '{}'".format(from_info['bucket'], from_info['key'], to_info['path']) logger.debug(msg) tmp_file = self.tmp_file(to_info['path']) if not name: name = os.path.basename(to_info['path']) self._makedirs(to_info['path']) try: if no_progress_bar: cb = None else: total = s3.head_object( Bucket=from_info['bucket'], Key=from_info['key'])['ContentLength'] cb = Callback(name, total) s3.download_file(from_info['bucket'], from_info['key'], tmp_file, Callback=cb) except Exception as exc: msg = "Failed to download '{}/{}'".format( from_info['bucket'], from_info['key']) logger.warn(msg, exc) continue os.rename(tmp_file, to_info['path']) if not no_progress_bar: progress.finish_target(name)
def _cloud(self): remote = self._core.get(Config.SECTION_CORE_REMOTE, '') if remote != '': return self._init_remote(remote) if self._core.get(Config.SECTION_CORE_CLOUD, None): # backward compatibility msg = 'Using obsoleted config format. Consider updating.' logger.warn(msg) return self._init_compat() return None
def changed_cache(self, etag): key = posixpath.join(self.prefix, etag[0:2], etag[2:]) cache = {'scheme': 's3', 'bucket': self.bucket, 'key': key} if {self.PARAM_ETAG: etag} != self.save_info(cache): if self.exists([cache])[0]: msg = 'Corrupted cache file {}' logger.warn(msg.format(self.to_string(cache))) self.remove(cache) return True return False
def changed_cache(self, checksum): cache = {} cache['scheme'] = 'hdfs' cache['user'] = self.user cache['url'] = posixpath.join(self.url, checksum[0:2], checksum[2:]) if {self.PARAM_CHECKSUM: checksum} != self.save_info(cache): if self.exists([cache])[0]: msg = 'Corrupted cache file {}' logger.warn(msg.format(self.to_string(cache))) self.remove(cache) return True return False
def download(self, from_infos, to_infos, no_progress_bar=False, names=None): names = self._verify_path_args(from_infos, to_infos, names) ssh = self.ssh(host=from_infos[0]['host'], user=from_infos[0]['user'], port=from_infos[0]['port']) for to_info, from_info, name in zip(to_infos, from_infos, names): if from_info['scheme'] != 'ssh': raise NotImplementedError if to_info['scheme'] == 'ssh': assert from_info['host'] == to_info['host'] assert from_info['port'] == to_info['port'] assert from_info['user'] == to_info['user'] self.cp(from_info, to_info, ssh=ssh) continue if to_info['scheme'] != 'local': raise NotImplementedError msg = "Downloading '{}/{}' to '{}'".format(from_info['host'], from_info['path'], to_info['path']) logger.debug(msg) if not name: name = os.path.basename(to_info['path']) self._makedirs(to_info['path']) tmp_file = self.tmp_file(to_info['path']) try: ssh.open_sftp().get(from_info['path'], tmp_file, callback=create_cb(name)) except Exception as exc: msg = "Failed to download '{}/{}' to '{}'" logger.warn( msg.format(from_info['host'], from_info['path'], to_info['path']), exc) continue os.rename(tmp_file, to_info['path']) progress.finish_target(name) ssh.close()
def changed_cache(self, md5): cache = {} cache['scheme'] = 'ssh' cache['host'] = self.host cache['port'] = self.port cache['user'] = self.user cache['path'] = posixpath.join(self.prefix, md5[0:2], md5[2:]) if {self.PARAM_MD5: md5} != self.save_info(cache): if self.exists([cache])[0]: msg = 'Corrupted cache file {}' logger.warn(msg.format(self.to_string(cache))) self.remove(cache) return True return False
def download(self, from_infos, to_infos, no_progress_bar=False, names=None): names = self._verify_path_args(to_infos, from_infos, names) for to_info, from_info, name in zip(to_infos, from_infos, names): if from_info['scheme'] not in ['http', 'https']: raise NotImplementedError if to_info['scheme'] != 'local': raise NotImplementedError msg = "Downloading '{}' to '{}'".format(from_info['url'], to_info['path']) logger.debug(msg) tmp_file = self.tmp_file(to_info['path']) if not name: name = os.path.basename(to_info['path']) self._makedirs(to_info['path']) total = self._content_length(from_info['url']) if no_progress_bar or not total: cb = None else: cb = ProgressBarCallback(name, total) try: self._download_to(from_info['url'], tmp_file, callback=cb) except Exception as exc: msg = "Failed to download '{}'".format(from_info['url']) logger.warn(msg, exc) continue os.rename(tmp_file, to_info['path']) if not no_progress_bar: progress.finish_target(name)
def download(self, from_infos, to_infos, no_progress_bar=False, names=None): names = self._verify_path_args(from_infos, to_infos, names) for to_info, from_info, name in zip(to_infos, from_infos, names): if from_info['scheme'] != self.scheme: raise NotImplementedError if to_info['scheme'] != 'local': raise NotImplementedError bucket = from_info['bucket'] key = from_info['key'] logger.debug("Downloading '{}/{}' to '{}'".format( bucket, key, to_info['path'])) tmp_file = self.tmp_file(to_info['path']) if not name: name = os.path.basename(to_info['path']) cb = None if no_progress_bar else Callback(name) self._makedirs(to_info['path']) try: self.blob_service.get_blob_to_path(bucket, key, tmp_file, progress_callback=cb) except Exception as exc: msg = "Failed to download '{}/{}'".format(bucket, key) logger.warn(msg, exc) else: os.rename(tmp_file, to_info['path']) if not no_progress_bar: progress.finish_target(name)
def upload(self, from_infos, to_infos, names=None): names = self._verify_path_args(to_infos, from_infos, names) ssh = self.ssh(host=to_infos[0]['host'], user=to_infos[0]['user'], port=to_infos[0]['port']) sftp = ssh.open_sftp() for from_info, to_info, name in zip(from_infos, to_infos, names): if to_info['scheme'] != 'ssh': raise NotImplementedError if from_info['scheme'] != 'local': raise NotImplementedError logger.debug("Uploading '{}' to '{}/{}'".format( from_info['path'], to_info['host'], to_info['path'])) if not name: name = os.path.basename(from_info['path']) dname = posixpath.dirname(to_info['path']) self._exec(ssh, 'mkdir -p {}'.format(dname)) try: sftp.put(from_info['path'], to_info['path'], callback=create_cb(name)) except Exception as exc: msg = "Failed to upload '{}' to '{}/{}'" logger.warn( msg.format(from_info['path'], to_info['host'], to_info['path'], exc)) continue progress.finish_target(name) sftp.close() ssh.close()
def upload(self, from_infos, to_infos, names=None): names = self._verify_path_args(to_infos, from_infos, names) for from_info, to_info, name in zip(from_infos, to_infos, names): if to_info['scheme'] != 'local': raise NotImplementedError if from_info['scheme'] != 'local': raise NotImplementedError logger.debug("Uploading '{}' to '{}'".format( from_info['path'], to_info['path'])) if not name: name = os.path.basename(from_info['path']) self._makedirs(to_info['path']) try: copyfile(from_info['path'], to_info['path'], name=name) except Exception as exc: msg = "Failed to upload '{}' tp '{}'" logger.warn(msg.format(from_info['path'], to_info['path']), exc)
def checkout(self, path_info, checksum_info, force=False): path = path_info['path'] md5 = checksum_info.get(self.PARAM_MD5) cache = self.get(md5) if not cache: msg = 'No cache info for \'{}\'. Skipping checkout.' logger.warn(msg.format(os.path.relpath(path))) return if not self.changed(path_info, checksum_info): msg = "Data '{}' didn't change." logger.info(msg.format(os.path.relpath(path))) return if self.changed_cache(md5): msg = u'Cache \'{}\' not found. File \'{}\' won\'t be created.' logger.warn(msg.format(md5, os.path.relpath(path))) remove(path) return msg = u'Checking out \'{}\' with cache \'{}\'.' logger.info(msg.format(os.path.relpath(path), md5)) if not self.is_dir_cache(cache): if os.path.exists(path): if force or self._already_cached(path): remove(path) else: self._safe_remove(path) self.link(cache, path) self.state.update_link(path) return # Create dir separately so that dir is created # even if there are no files in it if not os.path.exists(path): os.makedirs(path) dir_info = self.load_dir_cache(md5) dir_relpath = os.path.relpath(path) dir_size = len(dir_info) bar = dir_size > LARGE_DIR_SIZE logger.info("Linking directory '{}'.".format(dir_relpath)) for processed, entry in enumerate(dir_info): relpath = entry[self.PARAM_RELPATH] m = entry[self.PARAM_MD5] p = os.path.join(path, relpath) c = self.get(m) entry_info = {'scheme': path_info['scheme'], self.PARAM_PATH: p} entry_checksum_info = {self.PARAM_MD5: m} if self.changed(entry_info, entry_checksum_info): if os.path.exists(p): if force or self._already_cached(p): remove(p) else: self._safe_remove(p) self.link(c, p) if bar: progress.update_target(dir_relpath, processed, dir_size) self._discard_working_directory_changes(path, dir_info, force=force) self.state.update_link(path) if bar: progress.finish_target(dir_relpath)