def report_disk_usage(self, store): index_size = store.get_one("final_index_size_bytes") bytes_written = store.get_one("disk_io_write_bytes_%s" % track.BenchmarkPhase.index.name) if index_size is not None and bytes_written is not None: print(" Final index size: %.1fGB (%.1fMB)" % (convert.bytes_to_gb(index_size), convert.bytes_to_mb(index_size))) print(" Totally written: %.1fGB (%.1fMB)" % (convert.bytes_to_gb(bytes_written), convert.bytes_to_mb(bytes_written))) else: print(" Could not determine disk usage metrics")
def download(cfg, url, local_path, size_in_bytes): offline = cfg.opts("system", "offline.mode") file_exists = os.path.isfile(local_path) if file_exists: logger.info("[%s] already exists locally. Skipping download." % local_path) return if not offline: logger.info("Downloading from [%s] to [%s]." % (url, local_path)) try: io.ensure_dir(os.path.dirname(local_path)) size_in_mb = round(convert.bytes_to_mb(size_in_bytes)) # ensure output appears immediately print("Downloading data from %s (%s MB) ... " % (url, size_in_mb), end='', flush=True) net.download(url, local_path, size_in_bytes) print("Done") except urllib.error.URLError: logger.exception("Could not download [%s] to [%s]." % (url, local_path)) # file must exist at this point -> verify if not os.path.isfile(local_path): if offline: raise exceptions.SystemSetupError( "Cannot find %s. Please disable offline mode and retry again." % local_path) else: raise exceptions.SystemSetupError( "Could not download from %s to %s. Please verify that data are available at %s and " "check your internet connection." % (url, local_path, url))
def download(cfg, url, local_path, size_in_bytes): offline = cfg.opts("system", "offline.mode") file_exists = os.path.isfile(local_path) # ensure we only skip the download if the file size also matches our expectation if file_exists and (size_in_bytes is None or os.path.getsize(local_path) == size_in_bytes): logger.info("[%s] already exists locally. Skipping download." % local_path) return False if not offline: try: io.ensure_dir(os.path.dirname(local_path)) if size_in_bytes: size_in_mb = round(convert.bytes_to_mb(size_in_bytes)) # ensure output appears immediately logger.info("Downloading data from [%s] (%s MB) to [%s]." % (url, size_in_mb, local_path)) else: logger.info("Downloading data from [%s] to [%s]." % (url, local_path)) # we want to have a bit more accurate download progress as these files are typically very large progress = net.Progress( "[INFO] Downloading data for track %s" % track.name, accuracy=1) net.download(url, local_path, size_in_bytes, progress_indicator=progress) progress.finish() logger.info("Downloaded data from [%s] to [%s]." % (url, local_path)) except urllib.error.URLError: logger.exception("Could not download [%s] to [%s]." % (url, local_path)) # file must exist at this point -> verify if not os.path.isfile(local_path): if offline: raise exceptions.SystemSetupError( "Cannot find %s. Please disable offline mode and retry again." % local_path) else: raise exceptions.SystemSetupError( "Cannot download from %s to %s. Please verify that data are available at %s and " "check your internet connection." % (url, local_path, url)) actual_size = os.path.getsize(local_path) if size_in_bytes is not None and actual_size != size_in_bytes: raise exceptions.DataError( "[%s] is corrupt. Downloaded [%d] bytes but [%d] bytes are expected." % (local_path, actual_size, size_in_bytes)) return True
def download(cfg, url, local_path, size_in_bytes): offline = cfg.opts("system", "offline.mode") file_exists = os.path.isfile(local_path) # ensure we only skip the download if the file size also matches our expectation if file_exists and (size_in_bytes is None or os.path.getsize(local_path) == size_in_bytes): logger.info("[%s] already exists locally. Skipping download." % local_path) return False if not offline: try: io.ensure_dir(os.path.dirname(local_path)) if size_in_bytes: size_in_mb = round(convert.bytes_to_mb(size_in_bytes)) # ensure output appears immediately console.info( "Downloading data from [%s] (%s MB) to [%s] ... " % (url, size_in_mb, local_path), end='', flush=True, logger=logger) else: console.info("Downloading data from [%s] to [%s] ... " % (url, local_path), end='', flush=True, logger=logger) net.download(url, local_path, size_in_bytes) console.println("[OK]") except urllib.error.URLError: logger.exception("Could not download [%s] to [%s]." % (url, local_path)) # file must exist at this point -> verify if not os.path.isfile(local_path): if offline: raise exceptions.SystemSetupError( "Cannot find %s. Please disable offline mode and retry again." % local_path) else: raise exceptions.SystemSetupError( "Cannot download from %s to %s. Please verify that data are available at %s and " "check your internet connection." % (url, local_path, url)) actual_size = os.path.getsize(local_path) if size_in_bytes is not None and actual_size != size_in_bytes: raise exceptions.DataError( "[%s] is corrupt. Downloaded [%d] bytes but [%d] bytes are expected." % (local_path, actual_size, size_in_bytes)) return True
def download_corpus(root_url, target_path, size_in_bytes, track_name, offline, test_mode): file_name = os.path.basename(target_path) if not root_url: raise exceptions.DataError("%s is missing and it cannot be downloaded because no source URL is provided in the track." % target_path) if offline: raise exceptions.SystemSetupError("Cannot find %s. Please disable offline mode and retry again." % target_path) data_url = "%s/%s" % (source_root_url, file_name) try: io.ensure_dir(os.path.dirname(target_path)) if size_in_bytes: size_in_mb = round(convert.bytes_to_mb(size_in_bytes)) logger.info("Downloading data from [%s] (%s MB) to [%s]." % (data_url, size_in_mb, target_path)) else: logger.info("Downloading data from [%s] to [%s]." % (data_url, target_path)) # we want to have a bit more accurate download progress as these files are typically very large progress = net.Progress("[INFO] Downloading data for track %s" % track_name, accuracy=1) net.download(data_url, target_path, size_in_bytes, progress_indicator=progress) progress.finish() logger.info("Downloaded data from [%s] to [%s]." % (data_url, target_path)) except urllib.error.HTTPError as e: if e.code == 404 and test_mode: raise exceptions.DataError("Track [%s] does not support test mode. Please ask the track author to add it or " "disable test mode and retry." % track_name) else: msg = "Could not download [%s] to [%s]" % (data_url, target_path) if e.reason: msg += " (HTTP status: %s, reason: %s)" % (str(e.code), e.reason) else: msg += " (HTTP status: %s)" % str(e.code) raise exceptions.DataError(msg) except urllib.error.URLError: logger.exception("Could not download [%s] to [%s]." % (data_url, target_path)) raise exceptions.DataError("Could not download [%s] to [%s]." % (data_url, target_path)) if not os.path.isfile(target_path): raise exceptions.SystemSetupError( "Cannot download from %s to %s. Please verify that data are available at %s and " "check your internet connection." % (data_url, target_path, data_url)) actual_size = os.path.getsize(target_path) if size_in_bytes is not None and actual_size != size_in_bytes: raise exceptions.DataError("[%s] is corrupt. Downloaded [%d] bytes but [%d] bytes are expected." % (target_path, actual_size, size_in_bytes))
def report_segment_memory(self, stats): if stats.has_memory_stats(): return [ ["Heap used for segments [MB]", convert.bytes_to_mb(stats.memory_segments)], ["Heap used for doc values [MB]", convert.bytes_to_mb(stats.memory_doc_values)], ["Heap used for terms [MB]", convert.bytes_to_mb(stats.memory_terms)], ["Heap used for norms [MB]", convert.bytes_to_mb(stats.memory_norms)], ["Heap used for points [MB]", convert.bytes_to_mb(stats.memory_points)], ["Heap used for stored fields [MB]", convert.bytes_to_mb(stats.memory_stored_fields)], ] else: return []
def download(cfg, url, local_path, size_in_bytes): offline = cfg.opts("system", "offline.mode") file_exists = os.path.isfile(local_path) # ensure we only skip the download if the file size also matches our expectation if file_exists and (size_in_bytes is None or os.path.getsize(local_path) == size_in_bytes): logger.info("[%s] already exists locally. Skipping download." % local_path) return False if not offline: try: io.ensure_dir(os.path.dirname(local_path)) if size_in_bytes: size_in_mb = round(convert.bytes_to_mb(size_in_bytes)) # ensure output appears immediately logger.info("Downloading data from [%s] (%s MB) to [%s]." % (url, size_in_mb, local_path)) else: logger.info("Downloading data from [%s] to [%s]." % (url, local_path)) # we want to have a bit more accurate download progress as these files are typically very large progress = net.Progress("[INFO] Downloading data for track %s" % track.name, accuracy=1) net.download(url, local_path, size_in_bytes, progress_indicator=progress) progress.finish() logger.info("Downloaded data from [%s] to [%s]." % (url, local_path)) except urllib.error.URLError: logger.exception("Could not download [%s] to [%s]." % (url, local_path)) # file must exist at this point -> verify if not os.path.isfile(local_path): if offline: raise exceptions.SystemSetupError( "Cannot find %s. Please disable offline mode and retry again." % local_path) else: raise exceptions.SystemSetupError( "Cannot download from %s to %s. Please verify that data are available at %s and " "check your internet connection." % (url, local_path, url)) actual_size = os.path.getsize(local_path) if size_in_bytes is not None and actual_size != size_in_bytes: raise exceptions.DataError("[%s] is corrupt. Downloaded [%d] bytes but [%d] bytes are expected." % (local_path, actual_size, size_in_bytes)) return True
def download(cfg, url, local_path, size_in_bytes): offline = cfg.opts("system", "offline.mode") file_exists = os.path.isfile(local_path) # ensure we only skip the download if the file size also matches our expectation if file_exists and (size_in_bytes is None or os.path.getsize(local_path) == size_in_bytes): logger.info("[%s] already exists locally. Skipping download." % local_path) return False if not offline: try: io.ensure_dir(os.path.dirname(local_path)) if size_in_bytes: size_in_mb = round(convert.bytes_to_mb(size_in_bytes)) # ensure output appears immediately console.info("Downloading data from [%s] (%s MB) to [%s] ... " % (url, size_in_mb, local_path), end='', flush=True, logger=logger) else: console.info("Downloading data from [%s] to [%s] ... " % (url, local_path), end='', flush=True, logger=logger) net.download(url, local_path, size_in_bytes) console.println("[OK]") except urllib.error.URLError: logger.exception("Could not download [%s] to [%s]." % (url, local_path)) # file must exist at this point -> verify if not os.path.isfile(local_path): if offline: raise exceptions.SystemSetupError( "Cannot find %s. Please disable offline mode and retry again." % local_path) else: raise exceptions.SystemSetupError( "Cannot download from %s to %s. Please verify that data are available at %s and " "check your internet connection." % (url, local_path, url)) actual_size = os.path.getsize(local_path) if size_in_bytes is not None and actual_size != size_in_bytes: raise exceptions.DataError("[%s] is corrupt. Downloaded [%d] bytes but [%d] bytes are expected." % (local_path, actual_size, size_in_bytes)) return True
def _download(self, url, local_path, size_in_bytes=None, force_download=False, raise_url_error=False): offline = self._config.opts("system", "offline.mode") file_exists = os.path.isfile(local_path) if file_exists and not force_download: logger.info("[%s] already exists locally. Skipping download." % local_path) return if not offline: logger.info("Downloading from [%s] to [%s]." % (url, local_path)) try: io.ensure_dir(os.path.dirname(local_path)) if size_in_bytes: size_in_mb = round(convert.bytes_to_mb(size_in_bytes)) # ensure output appears immediately print("Downloading data from %s (%s MB) ... " % (url, size_in_mb), end='', flush=True) if url.startswith("http"): net.download(url, local_path) elif url.startswith("s3"): self._do_download_via_s3(url, local_path, size_in_bytes) else: raise exceptions.SystemSetupError("Cannot download benchmark data from [%s]. Only http(s) and s3 are supported." % url) if size_in_bytes: print("Done") except urllib.error.URLError: logger.exception("Could not download [%s] to [%s]." % (url, local_path)) if raise_url_error: raise # file must exist at this point -> verify if not os.path.isfile(local_path): if offline: raise exceptions.SystemSetupError("Cannot find %s. Please disable offline mode and retry again." % local_path) else: raise exceptions.SystemSetupError("Could not download from %s to %s. Please verify that data are available at %s and " "check your internet connection." % (url, local_path, url))
def _mb(self, store, key): value = store.get_one(key) if value: return "%.2fMB" % convert.bytes_to_mb(value) else: return "N/A"