def get_data_http(self, filename): """Yield the data returned from `filename` of `pipeline_context` in manageable chunks.""" url = self.get_url(filename) try: infile = request.urlopen(url) file_size = utils.human_format_number( self.catalog_file_size(filename)).strip() stats = utils.TimingStats() data = infile.read(config.CRDS_DATA_CHUNK_SIZE) while data: stats.increment("bytes", len(data)) status = stats.status("bytes") bytes_so_far = " ".join(status[0].split()[:-1]) log.verbose("Transferred HTTP", repr(url), bytes_so_far, "/", file_size, "bytes at", status[1], verbosity=20) yield data data = infile.read(config.CRDS_DATA_CHUNK_SIZE) except Exception as exc: raise CrdsDownloadError("Failed downloading", srepr(filename), "from url", srepr(url), ":", str(exc)) from exc finally: try: infile.close() except UnboundLocalError: # maybe the open failed. pass
def download_files(self, downloads, localpaths): """Serial file-by-file download.""" download_metadata = get_download_metadata() self.info_map = {} for filename in downloads: self.info_map[filename] = download_metadata.get( filename, "NOT FOUND unknown to server") if config.writable_cache_or_verbose( "Readonly cache, skipping download of (first 5):", repr(downloads[:5]), verbosity=70): bytes_so_far = 0 total_files = len(downloads) total_bytes = get_total_bytes(self.info_map) for nth_file, name in enumerate(downloads): try: if "NOT FOUND" in self.info_map[name]: raise CrdsDownloadError( "file is not known to CRDS server.") bytes, path = self.catalog_file_size( name), localpaths[name] log.info( file_progress("Fetching", name, path, bytes, bytes_so_far, total_bytes, nth_file, total_files)) self.download(name, path) bytes_so_far += os.stat(path).st_size except Exception as exc: if self.raise_exceptions: raise else: log.error("Failure downloading file", repr(name), ":", str(exc)) return bytes_so_far return 0
def verify_file(self, filename, localpath): """Check that the size and checksum of downloaded `filename` match the server.""" remote_info = self.info_map[filename] local_length = os.stat(localpath).st_size original_length = int(remote_info["size"]) if original_length != local_length and config.get_length_flag(): raise CrdsDownloadError( "downloaded file size", local_length, "does not match server size", original_length) if not config.get_checksum_flag(): log.verbose("Skipping sha1sum with CRDS_DOWNLOAD_CHECKSUMS=False") elif remote_info["sha1sum"] not in ["", "none"]: original_sha1sum = remote_info["sha1sum"] local_sha1sum = utils.checksum(localpath) if original_sha1sum != local_sha1sum: raise CrdsDownloadError( "downloaded file", srepr(filename), "sha1sum", srepr(local_sha1sum), "does not match server sha1sum", srepr(original_sha1sum)) else: log.verbose("Skipping sha1sum check since server doesn't know it.")
def plugin_download(self, filename, localpath): """Run an external program defined by CRDS_DOWNLOAD_PLUGIN to download filename to localpath.""" url = self.get_url(filename) plugin_cmd = config.get_download_plugin() plugin_cmd = plugin_cmd.replace("${SOURCE_URL}", url) plugin_cmd = plugin_cmd.replace("${OUTPUT_PATH}", localpath) log.verbose("Running download plugin:", repr(plugin_cmd)) status = os.system(plugin_cmd) if status != 0: if status == 2: raise KeyboardInterrupt("Interrupted plugin.") else: raise CrdsDownloadError( "Plugin download fail status =", repr(status), "with command:", srepr(plugin_cmd))
def download(self, name, localpath): """Download a single file.""" # This code is complicated by the desire to blow away failed downloads. For the specific # case of KeyboardInterrupt, the file needs to be blown away, but the interrupt should not # be re-characterized so it is still un-trapped elsewhere under normal idioms which try *not* # to trap KeyboardInterrupt. assert not config.get_cache_readonly(), "Readonly cache, cannot download files " + repr(name) try: utils.ensure_dir_exists(localpath) return proxy.apply_with_retries(self.download_core, name, localpath) except Exception as exc: self.remove_file(localpath) raise CrdsDownloadError( "Error fetching data for", srepr(name), "at CRDS server", srepr(get_crds_server()), "with mode", srepr(config.get_download_mode()), ":", str(exc)) from exc except: # mainly for control-c, catch it and throw it. self.remove_file(localpath) raise