def fetch(self): io.ensure_dir(self.distributions_root) download_url = self.repo.download_url distribution_path = os.path.join(self.distributions_root, self.repo.file_name) self.logger.info("Resolved download URL [%s] for version [%s]", download_url, self.version) if not os.path.isfile(distribution_path) or not self.repo.cache: try: self.logger.info("Starting download of Elasticsearch [%s]", self.version) progress = net.Progress("[INFO] Downloading Elasticsearch %s" % self.version) net.download(download_url, distribution_path, progress_indicator=progress) progress.finish() self.logger.info("Successfully downloaded Elasticsearch [%s].", self.version) except urllib.error.HTTPError: self.logger.exception( "Cannot download Elasticsearch distribution for version [%s] from [%s].", self.version, download_url) raise exceptions.SystemSetupError( "Cannot download Elasticsearch distribution from [%s]. Please check that the specified " "version [%s] is correct." % (download_url, self.version)) else: self.logger.info( "Skipping download for version [%s]. Found an existing binary at [%s].", self.version, distribution_path) self.distribution_path = distribution_path
def test_progress(self): progress = net.Progress("test") mock_progress = mock.Mock() progress.p = mock_progress progress(42, 100) assert mock_progress.print.called mock_progress.reset_mock() progress(42, None) assert mock_progress.print.called
def download(cfg, url, local_path, size_in_bytes): offline = cfg.opts("system", "offline.mode") file_exists = os.path.isfile(local_path) # ensure we only skip the download if the file size also matches our expectation if file_exists and (size_in_bytes is None or os.path.getsize(local_path) == size_in_bytes): logger.info("[%s] already exists locally. Skipping download." % local_path) return False if not offline: try: io.ensure_dir(os.path.dirname(local_path)) if size_in_bytes: size_in_mb = round(convert.bytes_to_mb(size_in_bytes)) # ensure output appears immediately logger.info("Downloading data from [%s] (%s MB) to [%s]." % (url, size_in_mb, local_path)) else: logger.info("Downloading data from [%s] to [%s]." % (url, local_path)) # we want to have a bit more accurate download progress as these files are typically very large progress = net.Progress( "[INFO] Downloading data for track %s" % track.name, accuracy=1) net.download(url, local_path, size_in_bytes, progress_indicator=progress) progress.finish() logger.info("Downloaded data from [%s] to [%s]." % (url, local_path)) except urllib.error.URLError: logger.exception("Could not download [%s] to [%s]." % (url, local_path)) # file must exist at this point -> verify if not os.path.isfile(local_path): if offline: raise exceptions.SystemSetupError( "Cannot find %s. Please disable offline mode and retry again." % local_path) else: raise exceptions.SystemSetupError( "Cannot download from %s to %s. Please verify that data are available at %s and " "check your internet connection." % (url, local_path, url)) actual_size = os.path.getsize(local_path) if size_in_bytes is not None and actual_size != size_in_bytes: raise exceptions.DataError( "[%s] is corrupt. Downloaded [%d] bytes but [%d] bytes are expected." % (local_path, actual_size, size_in_bytes)) return True
def from_distribution(version, repo_name, distribution_config, distributions_root, plugins): if version.strip() == "": raise exceptions.SystemSetupError( "Could not determine version. Please specify the Elasticsearch distribution " "to download with the command line parameter --distribution-version. " "E.g. --distribution-version=5.0.0") io.ensure_dir(distributions_root) distribution_path = "%s/elasticsearch-%s.tar.gz" % (distributions_root, version) repo = DistributionRepository(repo_name, distribution_config, version) download_url = repo.download_url logger.info("Resolved download URL [%s] for version [%s]" % (download_url, version)) if not os.path.isfile(distribution_path) or not repo.cache: try: logger.info("Starting download of Elasticsearch [%s]" % version) progress = net.Progress("[INFO] Downloading Elasticsearch %s" % version) net.download(download_url, distribution_path, progress_indicator=progress) progress.finish() logger.info("Successfully downloaded Elasticsearch [%s]." % version) except urllib.error.HTTPError: console.println("[FAILED]") logging.exception( "Cannot download Elasticsearch distribution for version [%s] from [%s]." % (version, download_url)) raise exceptions.SystemSetupError( "Cannot download Elasticsearch distribution from [%s]. Please check that the specified " "version [%s] is correct." % (download_url, version)) else: logger.info( "Skipping download for version [%s]. Found an existing binary locally at [%s]." % (version, distribution_path)) binaries = {"elasticsearch": distribution_path} for plugin in plugins: # if we have multiple plugin configurations for a plugin we will override entries here but as this is always the same # key-value pair this is ok. plugin_url = repo.plugin_download_url(plugin.name) if plugin_url: binaries[plugin.name] = plugin_url return binaries
def download_corpus(root_url, target_path, size_in_bytes, track_name, offline, test_mode): file_name = os.path.basename(target_path) if not root_url: raise exceptions.DataError("%s is missing and it cannot be downloaded because no source URL is provided in the track." % target_path) if offline: raise exceptions.SystemSetupError("Cannot find %s. Please disable offline mode and retry again." % target_path) data_url = "%s/%s" % (source_root_url, file_name) try: io.ensure_dir(os.path.dirname(target_path)) if size_in_bytes: size_in_mb = round(convert.bytes_to_mb(size_in_bytes)) logger.info("Downloading data from [%s] (%s MB) to [%s]." % (data_url, size_in_mb, target_path)) else: logger.info("Downloading data from [%s] to [%s]." % (data_url, target_path)) # we want to have a bit more accurate download progress as these files are typically very large progress = net.Progress("[INFO] Downloading data for track %s" % track_name, accuracy=1) net.download(data_url, target_path, size_in_bytes, progress_indicator=progress) progress.finish() logger.info("Downloaded data from [%s] to [%s]." % (data_url, target_path)) except urllib.error.HTTPError as e: if e.code == 404 and test_mode: raise exceptions.DataError("Track [%s] does not support test mode. Please ask the track author to add it or " "disable test mode and retry." % track_name) else: msg = "Could not download [%s] to [%s]" % (data_url, target_path) if e.reason: msg += " (HTTP status: %s, reason: %s)" % (str(e.code), e.reason) else: msg += " (HTTP status: %s)" % str(e.code) raise exceptions.DataError(msg) except urllib.error.URLError: logger.exception("Could not download [%s] to [%s]." % (data_url, target_path)) raise exceptions.DataError("Could not download [%s] to [%s]." % (data_url, target_path)) if not os.path.isfile(target_path): raise exceptions.SystemSetupError( "Cannot download from %s to %s. Please verify that data are available at %s and " "check your internet connection." % (data_url, target_path, data_url)) actual_size = os.path.getsize(target_path) if size_in_bytes is not None and actual_size != size_in_bytes: raise exceptions.DataError("[%s] is corrupt. Downloaded [%d] bytes but [%d] bytes are expected." % (target_path, actual_size, size_in_bytes))
def from_distribution(version, repo_name, distributions_root): if version.strip() == "": raise exceptions.SystemSetupError( "Could not determine version. Please specify the Elasticsearch distribution " "to download with the command line parameter --distribution-version. " "E.g. --distribution-version=5.0.0") io.ensure_dir(distributions_root) distribution_path = "%s/elasticsearch-%s.tar.gz" % (distributions_root, version) try: repo = distribution_repos[repo_name] except KeyError: raise exceptions.SystemSetupError( "Unknown distribution repository [%s]. Valid values are: [%s]" % (repo_name, ",".join(distribution_repos.keys()))) download_url = repo.download_url(version) logger.info("Resolved download URL [%s] for version [%s]" % (download_url, version)) if not os.path.isfile(distribution_path) or repo.must_download: try: logger.info("Starting download of Elasticsearch [%s]" % version) progress = net.Progress("[INFO] Downloading Elasticsearch %s" % version) net.download(download_url, distribution_path, progress_indicator=progress) progress.finish() logger.info("Successfully downloaded Elasticsearch [%s]." % version) except urllib.error.HTTPError: console.println("[FAILED]") logging.exception( "Cannot download Elasticsearch distribution for version [%s] from [%s]." % (version, download_url)) raise exceptions.SystemSetupError( "Cannot download Elasticsearch distribution from [%s]. Please check that the specified " "version [%s] is correct." % (download_url, version)) else: logger.info( "Skipping download for version [%s]. Found an existing binary locally at [%s]." % (version, distribution_path)) return distribution_path