def fetch(self): io.ensure_dir(self.distributions_root) download_url = self.repo.download_url distribution_path = os.path.join(self.distributions_root, self.repo.file_name) self.logger.info("Resolved download URL [%s] for version [%s]", download_url, self.version) if not os.path.isfile(distribution_path) or not self.repo.cache: try: self.logger.info("Starting download of Elasticsearch [%s]", self.version) progress = net.Progress("[INFO] Downloading Elasticsearch %s" % self.version) net.download(download_url, distribution_path, progress_indicator=progress) progress.finish() self.logger.info("Successfully downloaded Elasticsearch [%s].", self.version) except urllib.error.HTTPError: self.logger.exception( "Cannot download Elasticsearch distribution for version [%s] from [%s].", self.version, download_url) raise exceptions.SystemSetupError( "Cannot download Elasticsearch distribution from [%s]. Please check that the specified " "version [%s] is correct." % (download_url, self.version)) else: self.logger.info( "Skipping download for version [%s]. Found an existing binary at [%s].", self.version, distribution_path) self.distribution_path = distribution_path
def download_benchmark_candidate(ctx, track): version = ctx.config.opts("source", "distribution.version") if version.strip() == "": raise exceptions.SystemSetupError("Could not determine version. Please specify the command line the Elasticsearch " "distribution to download with the command line parameter --distribution-version. " "E.g. --distribution-version=5.0.0") distributions_root = "%s/%s" % (ctx.config.opts("system", "root.dir"), ctx.config.opts("source", "distribution.dir")) io.ensure_dir(distributions_root) distribution_path = "%s/elasticsearch-%s.zip" % (distributions_root, version) download_url = "https://download.elasticsearch.org/elasticsearch/release/org/elasticsearch/distribution/zip/elasticsearch/%s/" \ "elasticsearch-%s.zip" % (version, version) if not os.path.isfile(distribution_path): try: print("Downloading Elasticsearch %s ..." % version) net.download(download_url, distribution_path) except urllib.error.HTTPError: logging.exception("Cannot download Elasticsearch distribution for version [%s] from [%s]." % (version, download_url)) raise exceptions.SystemSetupError("Cannot download Elasticsearch distribution from [%s]. Please check that the specified " "version [%s] is correct." % (download_url, version)) else: logger.info("Skipping download for version [%s]. Found an existing binary locally at [%s]." % (version, distribution_path)) ctx.config.add(config.Scope.invocation, "builder", "candidate.bin.path", distribution_path)
def download_benchmark_candidate(ctx): version = ctx.config.opts("source", "distribution.version") repo_name = ctx.config.opts("source", "distribution.repository") if version.strip() == "": raise exceptions.SystemSetupError("Could not determine version. Please specify the Elasticsearch distribution " "to download with the command line parameter --distribution-version. " "E.g. --distribution-version=5.0.0") distributions_root = "%s/%s" % (ctx.config.opts("system", "root.dir"), ctx.config.opts("source", "distribution.dir")) io.ensure_dir(distributions_root) distribution_path = "%s/elasticsearch-%s.tar.gz" % (distributions_root, version) try: repo = distribution_repos[repo_name] except KeyError: raise exceptions.SystemSetupError("Unknown distribution repository [%s]. Valid values are: [%s]" % (repo_name, ",".join(distribution_repos.keys()))) download_url = repo.download_url(version) logger.info("Resolved download URL [%s] for version [%s]" % (download_url, version)) if not os.path.isfile(distribution_path) or repo.must_download: logger.info("Downloading distribution for version [%s]." % version) try: print("Downloading Elasticsearch %s ..." % version) net.download(download_url, distribution_path) except urllib.error.HTTPError: logging.exception("Cannot download Elasticsearch distribution for version [%s] from [%s]." % (version, download_url)) raise exceptions.SystemSetupError("Cannot download Elasticsearch distribution from [%s]. Please check that the specified " "version [%s] is correct." % (download_url, version)) else: logger.info("Skipping download for version [%s]. Found an existing binary locally at [%s]." % (version, distribution_path)) ctx.config.add(config.Scope.invocation, "builder", "candidate.bin.path", distribution_path)
def download(cfg, url, local_path, size_in_bytes): offline = cfg.opts("system", "offline.mode") file_exists = os.path.isfile(local_path) if file_exists: logger.info("[%s] already exists locally. Skipping download." % local_path) return if not offline: logger.info("Downloading from [%s] to [%s]." % (url, local_path)) try: io.ensure_dir(os.path.dirname(local_path)) size_in_mb = round(convert.bytes_to_mb(size_in_bytes)) # ensure output appears immediately print("Downloading data from %s (%s MB) ... " % (url, size_in_mb), end='', flush=True) net.download(url, local_path, size_in_bytes) print("Done") except urllib.error.URLError: logger.exception("Could not download [%s] to [%s]." % (url, local_path)) # file must exist at this point -> verify if not os.path.isfile(local_path): if offline: raise exceptions.SystemSetupError( "Cannot find %s. Please disable offline mode and retry again." % local_path) else: raise exceptions.SystemSetupError( "Could not download from %s to %s. Please verify that data are available at %s and " "check your internet connection." % (url, local_path, url))
def from_distribution(cfg): version = cfg.opts("source", "distribution.version") repo_name = cfg.opts("source", "distribution.repository") if version.strip() == "": raise exceptions.SystemSetupError("Could not determine version. Please specify the Elasticsearch distribution " "to download with the command line parameter --distribution-version. " "E.g. --distribution-version=5.0.0") distributions_root = "%s/%s" % (cfg.opts("system", "root.dir"), cfg.opts("source", "distribution.dir")) io.ensure_dir(distributions_root) distribution_path = "%s/elasticsearch-%s.tar.gz" % (distributions_root, version) try: repo = distribution_repos[repo_name] except KeyError: raise exceptions.SystemSetupError("Unknown distribution repository [%s]. Valid values are: [%s]" % (repo_name, ",".join(distribution_repos.keys()))) download_url = repo.download_url(version) logger.info("Resolved download URL [%s] for version [%s]" % (download_url, version)) if not os.path.isfile(distribution_path) or repo.must_download: try: console.info("Downloading Elasticsearch %s ... " % version, logger=logger, flush=True, end="") net.download(download_url, distribution_path) console.println("[OK]") except urllib.error.HTTPError: console.println("[FAILED]") logging.exception("Cannot download Elasticsearch distribution for version [%s] from [%s]." % (version, download_url)) raise exceptions.SystemSetupError("Cannot download Elasticsearch distribution from [%s]. Please check that the specified " "version [%s] is correct." % (download_url, version)) else: logger.info("Skipping download for version [%s]. Found an existing binary locally at [%s]." % (version, distribution_path)) cfg.add(config.Scope.invocation, "builder", "candidate.bin.path", distribution_path)
def download(cfg, url, local_path, size_in_bytes): offline = cfg.opts("system", "offline.mode") file_exists = os.path.isfile(local_path) # ensure we only skip the download if the file size also matches our expectation if file_exists and (size_in_bytes is None or os.path.getsize(local_path) == size_in_bytes): logger.info("[%s] already exists locally. Skipping download." % local_path) return False if not offline: try: io.ensure_dir(os.path.dirname(local_path)) if size_in_bytes: size_in_mb = round(convert.bytes_to_mb(size_in_bytes)) # ensure output appears immediately logger.info("Downloading data from [%s] (%s MB) to [%s]." % (url, size_in_mb, local_path)) else: logger.info("Downloading data from [%s] to [%s]." % (url, local_path)) # we want to have a bit more accurate download progress as these files are typically very large progress = net.Progress( "[INFO] Downloading data for track %s" % track.name, accuracy=1) net.download(url, local_path, size_in_bytes, progress_indicator=progress) progress.finish() logger.info("Downloaded data from [%s] to [%s]." % (url, local_path)) except urllib.error.URLError: logger.exception("Could not download [%s] to [%s]." % (url, local_path)) # file must exist at this point -> verify if not os.path.isfile(local_path): if offline: raise exceptions.SystemSetupError( "Cannot find %s. Please disable offline mode and retry again." % local_path) else: raise exceptions.SystemSetupError( "Cannot download from %s to %s. Please verify that data are available at %s and " "check your internet connection." % (url, local_path, url)) actual_size = os.path.getsize(local_path) if size_in_bytes is not None and actual_size != size_in_bytes: raise exceptions.DataError( "[%s] is corrupt. Downloaded [%d] bytes but [%d] bytes are expected." % (local_path, actual_size, size_in_bytes)) return True
def download(cfg, url, local_path, size_in_bytes): offline = cfg.opts("system", "offline.mode") file_exists = os.path.isfile(local_path) # ensure we only skip the download if the file size also matches our expectation if file_exists and (size_in_bytes is None or os.path.getsize(local_path) == size_in_bytes): logger.info("[%s] already exists locally. Skipping download." % local_path) return False if not offline: try: io.ensure_dir(os.path.dirname(local_path)) if size_in_bytes: size_in_mb = round(convert.bytes_to_mb(size_in_bytes)) # ensure output appears immediately console.info( "Downloading data from [%s] (%s MB) to [%s] ... " % (url, size_in_mb, local_path), end='', flush=True, logger=logger) else: console.info("Downloading data from [%s] to [%s] ... " % (url, local_path), end='', flush=True, logger=logger) net.download(url, local_path, size_in_bytes) console.println("[OK]") except urllib.error.URLError: logger.exception("Could not download [%s] to [%s]." % (url, local_path)) # file must exist at this point -> verify if not os.path.isfile(local_path): if offline: raise exceptions.SystemSetupError( "Cannot find %s. Please disable offline mode and retry again." % local_path) else: raise exceptions.SystemSetupError( "Cannot download from %s to %s. Please verify that data are available at %s and " "check your internet connection." % (url, local_path, url)) actual_size = os.path.getsize(local_path) if size_in_bytes is not None and actual_size != size_in_bytes: raise exceptions.DataError( "[%s] is corrupt. Downloaded [%d] bytes but [%d] bytes are expected." % (local_path, actual_size, size_in_bytes)) return True
def from_distribution(version, repo_name, distribution_config, distributions_root, plugins): if version.strip() == "": raise exceptions.SystemSetupError( "Could not determine version. Please specify the Elasticsearch distribution " "to download with the command line parameter --distribution-version. " "E.g. --distribution-version=5.0.0") io.ensure_dir(distributions_root) distribution_path = "%s/elasticsearch-%s.tar.gz" % (distributions_root, version) repo = DistributionRepository(repo_name, distribution_config, version) download_url = repo.download_url logger.info("Resolved download URL [%s] for version [%s]" % (download_url, version)) if not os.path.isfile(distribution_path) or not repo.cache: try: logger.info("Starting download of Elasticsearch [%s]" % version) progress = net.Progress("[INFO] Downloading Elasticsearch %s" % version) net.download(download_url, distribution_path, progress_indicator=progress) progress.finish() logger.info("Successfully downloaded Elasticsearch [%s]." % version) except urllib.error.HTTPError: console.println("[FAILED]") logging.exception( "Cannot download Elasticsearch distribution for version [%s] from [%s]." % (version, download_url)) raise exceptions.SystemSetupError( "Cannot download Elasticsearch distribution from [%s]. Please check that the specified " "version [%s] is correct." % (download_url, version)) else: logger.info( "Skipping download for version [%s]. Found an existing binary locally at [%s]." % (version, distribution_path)) binaries = {"elasticsearch": distribution_path} for plugin in plugins: # if we have multiple plugin configurations for a plugin we will override entries here but as this is always the same # key-value pair this is ok. plugin_url = repo.plugin_download_url(plugin.name) if plugin_url: binaries[plugin.name] = plugin_url return binaries
def download_corpus(root_url, target_path, size_in_bytes, track_name, offline, test_mode): file_name = os.path.basename(target_path) if not root_url: raise exceptions.DataError("%s is missing and it cannot be downloaded because no source URL is provided in the track." % target_path) if offline: raise exceptions.SystemSetupError("Cannot find %s. Please disable offline mode and retry again." % target_path) data_url = "%s/%s" % (source_root_url, file_name) try: io.ensure_dir(os.path.dirname(target_path)) if size_in_bytes: size_in_mb = round(convert.bytes_to_mb(size_in_bytes)) logger.info("Downloading data from [%s] (%s MB) to [%s]." % (data_url, size_in_mb, target_path)) else: logger.info("Downloading data from [%s] to [%s]." % (data_url, target_path)) # we want to have a bit more accurate download progress as these files are typically very large progress = net.Progress("[INFO] Downloading data for track %s" % track_name, accuracy=1) net.download(data_url, target_path, size_in_bytes, progress_indicator=progress) progress.finish() logger.info("Downloaded data from [%s] to [%s]." % (data_url, target_path)) except urllib.error.HTTPError as e: if e.code == 404 and test_mode: raise exceptions.DataError("Track [%s] does not support test mode. Please ask the track author to add it or " "disable test mode and retry." % track_name) else: msg = "Could not download [%s] to [%s]" % (data_url, target_path) if e.reason: msg += " (HTTP status: %s, reason: %s)" % (str(e.code), e.reason) else: msg += " (HTTP status: %s)" % str(e.code) raise exceptions.DataError(msg) except urllib.error.URLError: logger.exception("Could not download [%s] to [%s]." % (data_url, target_path)) raise exceptions.DataError("Could not download [%s] to [%s]." % (data_url, target_path)) if not os.path.isfile(target_path): raise exceptions.SystemSetupError( "Cannot download from %s to %s. Please verify that data are available at %s and " "check your internet connection." % (data_url, target_path, data_url)) actual_size = os.path.getsize(target_path) if size_in_bytes is not None and actual_size != size_in_bytes: raise exceptions.DataError("[%s] is corrupt. Downloaded [%d] bytes but [%d] bytes are expected." % (target_path, actual_size, size_in_bytes))
def from_distribution(version, repo_name, distributions_root): if version.strip() == "": raise exceptions.SystemSetupError( "Could not determine version. Please specify the Elasticsearch distribution " "to download with the command line parameter --distribution-version. " "E.g. --distribution-version=5.0.0") io.ensure_dir(distributions_root) distribution_path = "%s/elasticsearch-%s.tar.gz" % (distributions_root, version) try: repo = distribution_repos[repo_name] except KeyError: raise exceptions.SystemSetupError( "Unknown distribution repository [%s]. Valid values are: [%s]" % (repo_name, ",".join(distribution_repos.keys()))) download_url = repo.download_url(version) logger.info("Resolved download URL [%s] for version [%s]" % (download_url, version)) if not os.path.isfile(distribution_path) or repo.must_download: try: logger.info("Starting download of Elasticsearch [%s]" % version) progress = net.Progress("[INFO] Downloading Elasticsearch %s" % version) net.download(download_url, distribution_path, progress_indicator=progress) progress.finish() logger.info("Successfully downloaded Elasticsearch [%s]." % version) except urllib.error.HTTPError: console.println("[FAILED]") logging.exception( "Cannot download Elasticsearch distribution for version [%s] from [%s]." % (version, download_url)) raise exceptions.SystemSetupError( "Cannot download Elasticsearch distribution from [%s]. Please check that the specified " "version [%s] is correct." % (download_url, version)) else: logger.info( "Skipping download for version [%s]. Found an existing binary locally at [%s]." % (version, distribution_path)) return distribution_path
def download(cfg, url, local_path, size_in_bytes): offline = cfg.opts("system", "offline.mode") file_exists = os.path.isfile(local_path) # ensure we only skip the download if the file size also matches our expectation if file_exists and (size_in_bytes is None or os.path.getsize(local_path) == size_in_bytes): logger.info("[%s] already exists locally. Skipping download." % local_path) return False if not offline: try: io.ensure_dir(os.path.dirname(local_path)) if size_in_bytes: size_in_mb = round(convert.bytes_to_mb(size_in_bytes)) # ensure output appears immediately logger.info("Downloading data from [%s] (%s MB) to [%s]." % (url, size_in_mb, local_path)) else: logger.info("Downloading data from [%s] to [%s]." % (url, local_path)) # we want to have a bit more accurate download progress as these files are typically very large progress = net.Progress("[INFO] Downloading data for track %s" % track.name, accuracy=1) net.download(url, local_path, size_in_bytes, progress_indicator=progress) progress.finish() logger.info("Downloaded data from [%s] to [%s]." % (url, local_path)) except urllib.error.URLError: logger.exception("Could not download [%s] to [%s]." % (url, local_path)) # file must exist at this point -> verify if not os.path.isfile(local_path): if offline: raise exceptions.SystemSetupError( "Cannot find %s. Please disable offline mode and retry again." % local_path) else: raise exceptions.SystemSetupError( "Cannot download from %s to %s. Please verify that data are available at %s and " "check your internet connection." % (url, local_path, url)) actual_size = os.path.getsize(local_path) if size_in_bytes is not None and actual_size != size_in_bytes: raise exceptions.DataError("[%s] is corrupt. Downloaded [%d] bytes but [%d] bytes are expected." % (local_path, actual_size, size_in_bytes)) return True
def download(cfg, url, local_path, size_in_bytes): offline = cfg.opts("system", "offline.mode") file_exists = os.path.isfile(local_path) # ensure we only skip the download if the file size also matches our expectation if file_exists and (size_in_bytes is None or os.path.getsize(local_path) == size_in_bytes): logger.info("[%s] already exists locally. Skipping download." % local_path) return False if not offline: try: io.ensure_dir(os.path.dirname(local_path)) if size_in_bytes: size_in_mb = round(convert.bytes_to_mb(size_in_bytes)) # ensure output appears immediately console.info("Downloading data from [%s] (%s MB) to [%s] ... " % (url, size_in_mb, local_path), end='', flush=True, logger=logger) else: console.info("Downloading data from [%s] to [%s] ... " % (url, local_path), end='', flush=True, logger=logger) net.download(url, local_path, size_in_bytes) console.println("[OK]") except urllib.error.URLError: logger.exception("Could not download [%s] to [%s]." % (url, local_path)) # file must exist at this point -> verify if not os.path.isfile(local_path): if offline: raise exceptions.SystemSetupError( "Cannot find %s. Please disable offline mode and retry again." % local_path) else: raise exceptions.SystemSetupError( "Cannot download from %s to %s. Please verify that data are available at %s and " "check your internet connection." % (url, local_path, url)) actual_size = os.path.getsize(local_path) if size_in_bytes is not None and actual_size != size_in_bytes: raise exceptions.DataError("[%s] is corrupt. Downloaded [%d] bytes but [%d] bytes are expected." % (local_path, actual_size, size_in_bytes)) return True
def _download(self, url, local_path, size_in_bytes=None, force_download=False, raise_url_error=False): offline = self._config.opts("system", "offline.mode") file_exists = os.path.isfile(local_path) if file_exists and not force_download: logger.info("[%s] already exists locally. Skipping download." % local_path) return if not offline: logger.info("Downloading from [%s] to [%s]." % (url, local_path)) try: io.ensure_dir(os.path.dirname(local_path)) if size_in_bytes: size_in_mb = round(convert.bytes_to_mb(size_in_bytes)) # ensure output appears immediately print("Downloading data from %s (%s MB) ... " % (url, size_in_mb), end='', flush=True) if url.startswith("http"): net.download(url, local_path) elif url.startswith("s3"): self._do_download_via_s3(url, local_path, size_in_bytes) else: raise exceptions.SystemSetupError("Cannot download benchmark data from [%s]. Only http(s) and s3 are supported." % url) if size_in_bytes: print("Done") except urllib.error.URLError: logger.exception("Could not download [%s] to [%s]." % (url, local_path)) if raise_url_error: raise # file must exist at this point -> verify if not os.path.isfile(local_path): if offline: raise exceptions.SystemSetupError("Cannot find %s. Please disable offline mode and retry again." % local_path) else: raise exceptions.SystemSetupError("Could not download from %s to %s. Please verify that data are available at %s and " "check your internet connection." % (url, local_path, url))
def from_distribution(version, repo_name, distribution_config, distributions_root, plugins): if version.strip() == "": raise exceptions.SystemSetupError("Could not determine version. Please specify the Elasticsearch distribution " "to download with the command line parameter --distribution-version. " "E.g. --distribution-version=5.0.0") io.ensure_dir(distributions_root) distribution_path = "%s/elasticsearch-%s.tar.gz" % (distributions_root, version) repo = DistributionRepository(repo_name, distribution_config, version) download_url = repo.download_url logger.info("Resolved download URL [%s] for version [%s]" % (download_url, version)) if not os.path.isfile(distribution_path) or not repo.cache: try: logger.info("Starting download of Elasticsearch [%s]" % version) progress = net.Progress("[INFO] Downloading Elasticsearch %s" % version) net.download(download_url, distribution_path, progress_indicator=progress) progress.finish() logger.info("Successfully downloaded Elasticsearch [%s]." % version) except urllib.error.HTTPError: console.println("[FAILED]") logging.exception("Cannot download Elasticsearch distribution for version [%s] from [%s]." % (version, download_url)) raise exceptions.SystemSetupError("Cannot download Elasticsearch distribution from [%s]. Please check that the specified " "version [%s] is correct." % (download_url, version)) else: logger.info("Skipping download for version [%s]. Found an existing binary locally at [%s]." % (version, distribution_path)) binaries = {"elasticsearch": distribution_path} for plugin in plugins: # if we have multiple plugin configurations for a plugin we will override entries here but as this is always the same # key-value pair this is ok. plugin_url = repo.plugin_download_url(plugin.name) if plugin_url: binaries[plugin.name] = plugin_url return binaries