def before_lap(self): self.current_lap += 1 logger.info("Starting lap [%d/%d]" % (self.current_lap, self.race.total_laps)) if self.race.total_laps > 1: msg = "Lap [%d/%d]" % (self.current_lap, self.race.total_laps) console.println(console.format.bold(msg)) console.println(console.format.underline_for(msg))
def prepare_file_offset_table(data_file_path): """ Creates a file that contains a mapping from line numbers to file offsets for the provided path. This file is used internally by #skip_lines(data_file_path, data_file) to speed up line skipping. :param data_file_path: The path to a text file that is readable by this process. :return The number of lines read or ``None`` if it did not have to build the file offset table. """ offset_file_path = "%s.offset" % data_file_path # recreate only if necessary as this can be time-consuming if not os.path.exists(offset_file_path) or os.path.getmtime(offset_file_path) < os.path.getmtime(data_file_path): console.info("Preparing file offset table for [%s] ... " % data_file_path, end="", flush=True, logger=logger) line_number = 0 with open(offset_file_path, mode="w") as offset_file: with open(data_file_path, mode="rt") as data_file: while True: line = data_file.readline() if len(line) == 0: break line_number += 1 if line_number % 50000 == 0: print("%d;%d" % (line_number, data_file.tell()), file=offset_file) console.println("[OK]") return line_number else: logger.info("Skipping creation of file offset table at [%s] as it is still valid." % offset_file_path) return None
def from_distribution(cfg): version = cfg.opts("source", "distribution.version") repo_name = cfg.opts("source", "distribution.repository") if version.strip() == "": raise exceptions.SystemSetupError("Could not determine version. Please specify the Elasticsearch distribution " "to download with the command line parameter --distribution-version. " "E.g. --distribution-version=5.0.0") distributions_root = "%s/%s" % (cfg.opts("system", "root.dir"), cfg.opts("source", "distribution.dir")) io.ensure_dir(distributions_root) distribution_path = "%s/elasticsearch-%s.tar.gz" % (distributions_root, version) try: repo = distribution_repos[repo_name] except KeyError: raise exceptions.SystemSetupError("Unknown distribution repository [%s]. Valid values are: [%s]" % (repo_name, ",".join(distribution_repos.keys()))) download_url = repo.download_url(version) logger.info("Resolved download URL [%s] for version [%s]" % (download_url, version)) if not os.path.isfile(distribution_path) or repo.must_download: try: console.info("Downloading Elasticsearch %s ... " % version, logger=logger, flush=True, end="") net.download(download_url, distribution_path) console.println("[OK]") except urllib.error.HTTPError: console.println("[FAILED]") logging.exception("Cannot download Elasticsearch distribution for version [%s] from [%s]." % (version, download_url)) raise exceptions.SystemSetupError("Cannot download Elasticsearch distribution from [%s]. Please check that the specified " "version [%s] is correct." % (download_url, version)) else: logger.info("Skipping download for version [%s]. Found an existing binary locally at [%s]." % (version, distribution_path)) cfg.add(config.Scope.invocation, "builder", "candidate.bin.path", distribution_path)
def start(self, car=None): console.println(ExternalLauncher.BOGUS_RESULTS_WARNING) hosts = self.cfg.opts("launcher", "external.target.hosts") client_options = self.cfg.opts("launcher", "client.options") # unified client config self.cfg.add(config.Scope.benchmark, "client", "hosts", hosts) self.cfg.add(config.Scope.benchmark, "client", "options", client_options) es = self.client_factory(hosts, client_options).create() t = telemetry.Telemetry(self.cfg, devices=[ telemetry.ExternalEnvironmentInfo(self.cfg, es, self.metrics_store), telemetry.NodeStats(self.cfg, es, self.metrics_store), telemetry.IndexStats(self.cfg, es, self.metrics_store) ]) c = cluster.Cluster([], t) user_defined_version = self.cfg.opts("source", "distribution.version", mandatory=False) distribution_version = es.info()["version"]["number"] if not user_defined_version or user_defined_version.strip() == "": logger.info("Distribution version was not specified by user. Rally-determined version is [%s]" % distribution_version) self.cfg.add(config.Scope.benchmark, "source", "distribution.version", distribution_version) elif user_defined_version != distribution_version: console.println( "Warning: Specified distribution version '%s' on the command line differs from version '%s' reported by the cluster." % (user_defined_version, distribution_version), logger=logger.warn) t.attach_to_cluster(c) return c
def print_race_info(cfg): track_name = cfg.opts("benchmarks", "track") challenge_name = cfg.opts("benchmarks", "challenge") selected_car_name = cfg.opts("benchmarks", "car") console.info("Racing on track [%s], challenge [%s] and car [%s]" % (track_name, challenge_name, selected_car_name)) # just ensure it is optically separated console.println("")
def decompress(data_set_path, expected_size_in_bytes): # we assume that track data are always compressed and try to decompress them before running the benchmark basename, extension = io.splitext(data_set_path) decompressed = False if not os.path.isfile(basename) or os.path.getsize( basename) != expected_size_in_bytes: decompressed = True if type.uncompressed_size_in_bytes: console.info( "Decompressing track data from [%s] to [%s] (resulting size: %.2f GB) ... " % (data_set_path, basename, convert.bytes_to_gb(type.uncompressed_size_in_bytes)), end='', flush=True, logger=logger) else: console.info( "Decompressing track data from [%s] to [%s] ... " % (data_set_path, basename), end='', flush=True, logger=logger) io.decompress(data_set_path, io.dirname(data_set_path)) console.println("[OK]") extracted_bytes = os.path.getsize(basename) if expected_size_in_bytes is not None and extracted_bytes != expected_size_in_bytes: raise exceptions.DataError( "[%s] is corrupt. Extracted [%d] bytes but [%d] bytes are expected." % (basename, extracted_bytes, expected_size_in_bytes)) return basename, decompressed
def before_lap(self): self.current_lap += 1 self.logger.info("Starting lap [%d/%d]", self.current_lap, self.race.total_laps) if self.race.total_laps > 1: msg = "Lap [%d/%d]" % (self.current_lap, self.race.total_laps) console.println(console.format.bold(msg)) console.println(console.format.underline_for(msg))
def dispatch_sub_command(cfg, sub_command): try: if sub_command == "compare": reporter.compare(cfg) elif sub_command == "list": list(cfg) elif sub_command == "race": race(cfg) else: raise exceptions.SystemSetupError("Unknown subcommand [%s]" % sub_command) return True except exceptions.RallyError as e: logging.exception("Cannot run subcommand [%s]." % sub_command) console.error("Cannot %s. %s" % (sub_command, e)) console.println("") print_help_on_errors() return False except BaseException as e: logging.exception( "A fatal error occurred while running subcommand [%s]." % sub_command) console.error("Cannot %s. %s." % (sub_command, e)) console.println("") print_help_on_errors() return False
def prepare_file_offset_table(data_file_path): """ Creates a file that contains a mapping from line numbers to file offsets for the provided path. This file is used internally by #skip_lines(data_file_path, data_file) to speed up line skipping. :param data_file_path: The path to a text file that is readable by this process. :return The number of lines read or ``None`` if it did not have to build the file offset table. """ offset_file_path = "%s.offset" % data_file_path # recreate only if necessary as this can be time-consuming if not os.path.exists(offset_file_path) or os.path.getmtime( offset_file_path) < os.path.getmtime(data_file_path): console.info("Preparing file offset table for [%s] ... " % data_file_path, end="", flush=True) line_number = 0 with open(offset_file_path, mode="wt", encoding="utf-8") as offset_file: with open(data_file_path, mode="rt", encoding="utf-8") as data_file: while True: line = data_file.readline() if len(line) == 0: break line_number += 1 if line_number % 50000 == 0: print("%d;%d" % (line_number, data_file.tell()), file=offset_file) console.println("[OK]") return line_number else: return None
def stop(raise_errors=True): if actor.actor_system_already_running(): try: # TheSpian writes the following warning upon start (at least) on Mac OS X: # # WARNING:root:Unable to get address info for address 103.1.168.192.in-addr.arpa (AddressFamily.AF_INET,\ # SocketKind.SOCK_DGRAM, 17, 0): <class 'socket.gaierror'> [Errno 8] nodename nor servname provided, or not known # # Therefore, we will not show warnings but only errors. logging.basicConfig(level=logging.ERROR) running_system = actor.bootstrap_actor_system(try_join=True) running_system.shutdown() # await termination... console.info("Shutting down actor system.", end="", flush=True) while actor.actor_system_already_running(): console.println(".", end="", flush=True) time.sleep(1) console.println(" [OK]") except BaseException: console.error("Could not shut down actor system.") if raise_errors: # raise again so user can see the error raise elif raise_errors: console.error("Could not shut down actor system: Actor system is not running.") sys.exit(1)
def test_println_force_prints_even_when_quiet(self, patched_print, patched_isatty): console.init(quiet=True) patched_isatty.return_value = random.choice([True, False]) console.println(msg="Unittest message", force=True) patched_print.assert_called_once_with( "Unittest message", end="\n", flush=False )
def test_println_randomized_assume_tty_or_istty_and_isnotquiet(self, patched_print, patched_isatty): random_boolean = random.choice([True, False]) console.init(quiet=False, assume_tty=not random_boolean) patched_isatty.return_value = random_boolean console.println(msg="Unittest message") patched_print.assert_called_once_with( "Unittest message", end="\n", flush=False )
def test_println_isquiet_and_randomized_docker_assume_tty_or_istty( self, patched_print, patched_isatty): random_boolean = random.choice([True, False]) console.init(quiet=True, assume_tty=not random_boolean) patched_isatty.return_value = random_boolean console.RALLY_RUNNING_IN_DOCKER = not random_boolean console.println(msg="Unittest message") patched_print.assert_not_called()
def list_tracks(cfg): console.println("Available tracks:\n") console.println(tabulate.tabulate( tabular_data=[ [t.name, t.short_description, t.number_of_documents, convert.bytes_to_human_string(t.compressed_size_in_bytes), convert.bytes_to_human_string(t.uncompressed_size_in_bytes), t.default_challenge, ",".join(map(str, t.challenges))] for t in tracks(cfg) ], headers=["Name", "Description", "Documents", "Compressed Size", "Uncompressed Size", "Default Challenge", "All Challenges"]))
def list_tracks(cfg): console.println("Available tracks:\n") console.println(tabulate.tabulate( tabular_data=[ [t.name, t.description, t.number_of_documents, convert.bytes_to_human_string(t.compressed_size_in_bytes), convert.bytes_to_human_string(t.uncompressed_size_in_bytes), t.default_challenge, ",".join(map(str, t.challenges))] for t in tracks(cfg) ], headers=["Name", "Description", "Documents", "Compressed Size", "Uncompressed Size", "Default Challenge", "All Challenges"]))
def _try_init(self, may_skip_init=False): if not git.is_working_copy(self.src_dir): if self.has_remote(): console.println("Downloading sources for %s from %s to %s." % (self.name, self.remote_url, self.src_dir)) git.clone(self.src_dir, self.remote_url) elif os.path.isdir(self.src_dir) and may_skip_init: logger.info("Skipping repository initialization for %s." % self.name) else: exceptions.SystemSetupError("A remote repository URL is mandatory for %s" % self.name)
def list_cars(cfg): loader = CarLoader(team_repo(cfg)) cars = [] for name in loader.car_names(): cars.append(loader.load_car(name)) # first by type, then by name (we need to run the sort in reverse for that) # idiomatic way according to https://docs.python.org/3/howto/sorting.html#sort-stability-and-complex-sorts cars = sorted(sorted(cars, key=lambda c: c.name), key=lambda c: c.type) console.println("Available cars:\n") console.println(tabulate.tabulate([[c.name, c.type, c.description] for c in cars], headers=["Name", "Type", "Description"]))
def list_cars(cfg): loader = CarLoader(team_path(cfg)) cars = [] for name in loader.car_names(): cars.append(loader.load_car(name)) # first by type, then by name (we need to run the sort in reverse for that) # idiomatic way according to https://docs.python.org/3/howto/sorting.html#sort-stability-and-complex-sorts cars = sorted(sorted(cars, key=lambda c: c.name), key=lambda c: c.type) console.println("Available cars:\n") console.println(tabulate.tabulate([[c.name, c.type, c.description] for c in cars], headers=["Name", "Type", "Description"]))
def download(cfg): car, plugins = load_team(cfg, external=False) s = supplier.create(cfg, sources=False, distribution=True, build=False, car=car, plugins=plugins) binaries = s() console.println(json.dumps(binaries, indent=2), force=True)
def download(cfg, url, local_path, size_in_bytes): offline = cfg.opts("system", "offline.mode") file_exists = os.path.isfile(local_path) # ensure we only skip the download if the file size also matches our expectation if file_exists and (size_in_bytes is None or os.path.getsize(local_path) == size_in_bytes): logger.info("[%s] already exists locally. Skipping download." % local_path) return False if not offline: try: io.ensure_dir(os.path.dirname(local_path)) if size_in_bytes: size_in_mb = round(convert.bytes_to_mb(size_in_bytes)) # ensure output appears immediately console.info( "Downloading data from [%s] (%s MB) to [%s] ... " % (url, size_in_mb, local_path), end='', flush=True, logger=logger) else: console.info("Downloading data from [%s] to [%s] ... " % (url, local_path), end='', flush=True, logger=logger) net.download(url, local_path, size_in_bytes) console.println("[OK]") except urllib.error.URLError: logger.exception("Could not download [%s] to [%s]." % (url, local_path)) # file must exist at this point -> verify if not os.path.isfile(local_path): if offline: raise exceptions.SystemSetupError( "Cannot find %s. Please disable offline mode and retry again." % local_path) else: raise exceptions.SystemSetupError( "Cannot download from %s to %s. Please verify that data are available at %s and " "check your internet connection." % (url, local_path, url)) actual_size = os.path.getsize(local_path) if size_in_bytes is not None and actual_size != size_in_bytes: raise exceptions.DataError( "[%s] is corrupt. Downloaded [%d] bytes but [%d] bytes are expected." % (local_path, actual_size, size_in_bytes)) return True
def with_actor_system(runnable, cfg): already_running = actor.actor_system_already_running() logger.info("Actor system already running locally? [%s]" % str(already_running)) try: actors = actor.bootstrap_actor_system(try_join=already_running, prefer_local_only=not already_running) # We can only support remote benchmarks if we have a dedicated daemon that is not only bound to 127.0.0.1 cfg.add(config.Scope.application, "system", "remote.benchmarking.supported", already_running) except RuntimeError as e: logger.exception("Could not bootstrap actor system.") if str(e) == "Unable to determine valid external socket address.": console.warn("Could not determine a socket address. Are you running without any network? Switching to degraded mode.", logger=logger) actor.use_offline_actor_system() actors = actor.bootstrap_actor_system(try_join=True) else: raise try: runnable(cfg) finally: # We only shutdown the actor system if it was not already running before if not already_running: shutdown_complete = False times_interrupted = 0 while not shutdown_complete and times_interrupted < 2: try: logger.info("Attempting to shutdown internal actor system.") actors.shutdown() # note that this check will only evaluate to True for a TCP-based actor system. timeout = 15 while actor.actor_system_already_running() and timeout > 0: logger.info("Actor system is still running. Waiting...") time.sleep(1) timeout -= 1 if timeout > 0: shutdown_complete = True logger.info("Shutdown completed.") else: logger.warning("Shutdown timed out. Actor system is still running.") break except KeyboardInterrupt: times_interrupted += 1 logger.warning("User interrupted shutdown of internal actor system.") console.info("Please wait a moment for Rally's internal components to shutdown.") if not shutdown_complete and times_interrupted > 0: logger.warning("Terminating after user has interrupted actor system shutdown explicitly for [%d] times." % times_interrupted) console.println("") console.warn("Terminating now at the risk of leaving child processes behind.") console.println("") console.warn("The next race may fail due to an unclean shutdown.") console.println("") console.println(SKULL) console.println("") elif not shutdown_complete: console.warn("Could not terminate all internal processes within timeout. Please check and force-terminate all Rally processes.")
def test_println_randomized_dockertrue_or_istty_and_isnotquiet( self, patched_print, patched_isatty): console.init() console.QUIET = False random_boolean = random.choice([True, False]) patched_isatty.return_value = random_boolean console.RALLY_RUNNING_IN_DOCKER = not random_boolean console.println(msg="Unittest message") patched_print.assert_called_once_with("Unittest message", end="\n", flush=False)
def sweep(self): invocation_root = self.cfg.opts("system", "invocation.root.dir") track_name = self.cfg.opts("benchmarks", "track") challenge_name = self.cfg.opts("benchmarks", "challenge") car_name = self.cfg.opts("benchmarks", "car") log_root = paths.Paths(self.cfg).log_root() archive_path = "%s/logs-%s-%s-%s.zip" % (invocation_root, track_name, challenge_name, car_name) io.compress(log_root, archive_path) console.println("") console.info("Archiving logs in %s" % archive_path) shutil.rmtree(log_root)
def install(cfg): root_path = paths.install_root(cfg) car, plugins = load_team(cfg, external=False) # A non-empty distribution-version is provided distribution = bool( cfg.opts("mechanic", "distribution.version", mandatory=False)) sources = not distribution build = not cfg.opts("mechanic", "skip.build") build_type = cfg.opts("mechanic", "build.type") ip = cfg.opts("mechanic", "network.host") http_port = int(cfg.opts("mechanic", "network.http.port")) node_name = cfg.opts("mechanic", "node.name") master_nodes = cfg.opts("mechanic", "master.nodes") seed_hosts = cfg.opts("mechanic", "seed.hosts") if build_type == "tar": binary_supplier = supplier.create(cfg, sources, distribution, build, car, plugins) p = provisioner.local(cfg=cfg, car=car, plugins=plugins, cluster_settings={}, ip=ip, http_port=http_port, all_node_ips=seed_hosts, all_node_names=master_nodes, target_root=root_path, node_name=node_name) node_config = p.prepare(binary=binary_supplier()) elif build_type == "docker": if len(plugins) > 0: raise exceptions.SystemSetupError( "You cannot specify any plugins for Docker clusters. Please remove " "\"--elasticsearch-plugins\" and try again.") p = provisioner.docker(cfg=cfg, car=car, cluster_settings={}, ip=ip, http_port=http_port, target_root=root_path, node_name=node_name) # there is no binary for Docker that can be downloaded / built upfront node_config = p.prepare(binary=None) else: raise exceptions.SystemSetupError( "Unknown build type [{}]".format(build_type)) provisioner.save_node_configuration(root_path, node_config) console.println(json.dumps( {"installation-id": cfg.opts("system", "install.id")}, indent=2), force=True)
def dispatch_sub_command(cfg, sub_command): try: if sub_command == "compare": reporter.compare(cfg) elif sub_command == "list": dispatch_list(cfg) elif sub_command == "download": mechanic.download(cfg) elif sub_command == "install": mechanic.install(cfg) elif sub_command == "start": mechanic.start(cfg) elif sub_command == "stop": mechanic.stop(cfg) elif sub_command == "race": race(cfg) elif sub_command == "generate": generate(cfg) elif sub_command == "create-track": tracker.create_track(cfg) elif sub_command == "info": track.track_info(cfg) else: raise exceptions.SystemSetupError("Unknown subcommand [%s]" % sub_command) return True except exceptions.RallyError as e: logging.getLogger(__name__).exception("Cannot run subcommand [%s].", sub_command) msg = str(e.message) nesting = 0 while hasattr(e, "cause") and e.cause: nesting += 1 e = e.cause if hasattr(e, "message"): msg += "\n%s%s" % ("\t" * nesting, e.message) else: msg += "\n%s%s" % ("\t" * nesting, str(e)) console.error("Cannot %s. %s" % (sub_command, msg)) console.println("") print_help_on_errors() return False except BaseException as e: logging.getLogger(__name__).exception( "A fatal error occurred while running subcommand [%s].", sub_command) console.error("Cannot %s. %s." % (sub_command, e)) console.println("") print_help_on_errors() return False
def after_lap(self, lap): if self.laps > 1: lap_time = self.lap_timer.split_time() - self.lap_times self.lap_times += lap_time hl, ml, sl = convert.seconds_to_hour_minute_seconds(lap_time) reporter.summarize(self.metrics_store, self.cfg, track=self.track, lap=lap) console.println("") if lap < self.laps: remaining = (self.laps - lap) * self.lap_times / lap hr, mr, sr = convert.seconds_to_hour_minute_seconds(remaining) console.info("Lap time %02d:%02d:%02d (ETA: %02d:%02d:%02d)" % (hl, ml, sl, hr, mr, sr), logger=logger) else: console.info("Lap time %02d:%02d:%02d" % (hl, ml, sl), logger=logger) console.println("")
def from_sources(remote_url, src_dir, revision, gradle, java_home, log_dir, plugins, src_config, build=True): if build: console.info("Preparing for race ...", end="", flush=True) try: revisions = extract_revisions(revision) es_src_dir = os.path.join(src_dir, config_value(src_config, "elasticsearch.src.subdir")) try: es_revision = revisions["elasticsearch"] except KeyError: raise exceptions.SystemSetupError("No revision specified for Elasticsearch in [%s]." % revision) SourceRepository("Elasticsearch", remote_url, es_src_dir).fetch(es_revision) # this may as well be a core plugin and we need to treat them specially. :plugins:analysis-icu:assemble for plugin in plugins: if not plugin.core_plugin: plugin_remote_url = config_value(src_config, "plugin.%s.remote.repo.url" % plugin.name) plugin_src_dir = os.path.join(src_dir, config_value(src_config, "plugin.%s.src.subdir" % plugin.name)) try: plugin_revision = revisions[plugin.name] except KeyError: # maybe we can use the catch-all revision (only if it's not a git revision) plugin_revision = revisions.get("all") if not plugin_revision or SourceRepository.is_commit_hash(plugin_revision): raise exceptions.SystemSetupError("No revision specified for plugin [%s] in [%s]." % (plugin.name, revision)) else: logger.info("Revision for [%s] is not explicitly defined. Using catch-all revision [%s]." % (plugin.name, plugin_revision)) SourceRepository(plugin.name, plugin_remote_url, plugin_src_dir).fetch(plugin_revision) if build: builder = Builder(es_src_dir, gradle, java_home, log_dir) builder.build([CLEAN_TASK, ASSEMBLE_TASK]) for plugin in plugins: if plugin.core_plugin: task = ":plugins:%s:assemble" % plugin.name else: task = config_value(src_config, "plugin.%s.build.task" % plugin.name) builder.build([task]) console.println(" [OK]") binaries = {"elasticsearch": resolve_es_binary(es_src_dir)} for plugin in plugins: if plugin.core_plugin: binaries[plugin.name] = resolve_core_plugin_binary(plugin.name, es_src_dir) else: binaries[plugin.name] = resolve_plugin_binary(plugin.name, src_dir, src_config) return binaries except BaseException: if build: console.println(" [FAILED]") raise
def after_lap(self, lap): if self.laps > 1: lap_time = self.lap_timer.split_time() - self.lap_times self.lap_times += lap_time hl, ml, sl = convert.seconds_to_hour_minute_seconds(lap_time) reporter.summarize(self.cfg, track=self.track, lap=lap) console.println("") if lap < self.laps: remaining = (self.laps - lap) * self.lap_times / lap hr, mr, sr = convert.seconds_to_hour_minute_seconds(remaining) console.info("Lap time %02d:%02d:%02d (ETA: %02d:%02d:%02d)" % (hl, ml, sl, hr, mr, sr), logger=logger) else: console.info("Lap time %02d:%02d:%02d" % (hl, ml, sl), logger=logger) console.println("")
def __init__(self, track, params, **kwargs): super().__init__(track, params, **kwargs) self.request_params = params.get("request-params", {}) self.index_definitions = [] if track.indices: filter_idx = params.get("index") if isinstance(filter_idx, str): filter_idx = [filter_idx] settings = params.get("settings") for idx in track.indices: if not filter_idx or idx.name in filter_idx: body = idx.body if body and settings: if "settings" in body: # merge (and potentially override) body["settings"].update(settings) else: body["settings"] = settings elif not body: # this is just needed because we will output this in the middle of the benchmark and will thus write # this on the same line as the progress message. console.println("") console.warn( "Creating index %s based on deprecated type mappings. Please specify an index body instead. " "For details please see the migration guide in the docs." % idx.name, logger=logger) # TODO #366: Deprecate this syntax. We should only specify all mappings in the body property. # check all types and merge their mappings body = {"mappings": {}} if settings: body["settings"] = settings for t in idx.types: body["mappings"].update(t.mapping) self.index_definitions.append((idx.name, body)) else: try: # only 'index' is mandatory, the body is optional (may be ok to create an index without a body) idx = params["index"] body = params.get("body") if isinstance(idx, str): idx = [idx] for i in idx: self.index_definitions.append((i, body)) except KeyError: raise exceptions.InvalidSyntax( "Please set the property 'index' for the create-index operation" )
def from_distribution(version, repo_name, distribution_config, distributions_root, plugins): if version.strip() == "": raise exceptions.SystemSetupError( "Could not determine version. Please specify the Elasticsearch distribution " "to download with the command line parameter --distribution-version. " "E.g. --distribution-version=5.0.0") io.ensure_dir(distributions_root) distribution_path = "%s/elasticsearch-%s.tar.gz" % (distributions_root, version) repo = DistributionRepository(repo_name, distribution_config, version) download_url = repo.download_url logger.info("Resolved download URL [%s] for version [%s]" % (download_url, version)) if not os.path.isfile(distribution_path) or not repo.cache: try: logger.info("Starting download of Elasticsearch [%s]" % version) progress = net.Progress("[INFO] Downloading Elasticsearch %s" % version) net.download(download_url, distribution_path, progress_indicator=progress) progress.finish() logger.info("Successfully downloaded Elasticsearch [%s]." % version) except urllib.error.HTTPError: console.println("[FAILED]") logging.exception( "Cannot download Elasticsearch distribution for version [%s] from [%s]." % (version, download_url)) raise exceptions.SystemSetupError( "Cannot download Elasticsearch distribution from [%s]. Please check that the specified " "version [%s] is correct." % (download_url, version)) else: logger.info( "Skipping download for version [%s]. Found an existing binary locally at [%s]." % (version, distribution_path)) binaries = {"elasticsearch": distribution_path} for plugin in plugins: # if we have multiple plugin configurations for a plugin we will override entries here but as this is always the same # key-value pair this is ok. plugin_url = repo.plugin_download_url(plugin.name) if plugin_url: binaries[plugin.name] = plugin_url return binaries
def from_sources(remote_url, src_dir, revision, gradle, java_home, log_dir, build=True): if build: console.info("Preparing for race ...", end="", flush=True) try: SourceRepository(remote_url, src_dir).fetch(revision) builder = Builder(src_dir, gradle, java_home, log_dir) if build: builder.build() console.println(" [OK]") return {"elasticsearch": builder.binary} except BaseException: if build: console.println(" [FAILED]") raise
def from_sources(cfg, build=True): if build: console.info("Preparing for race ...", end="", flush=True) try: builder = Builder(cfg) SourceRepository(cfg).fetch() if build: builder.build() builder.add_binary_to_config() if build: console.println(" [OK]") except BaseException: if build: console.println(" [FAILED]") raise
def after_lap(self, lap): if self.laps > 1: lap_time = self.lap_timer.split_time() - self.lap_times self.lap_times += lap_time hl, ml, sl = convert.seconds_to_hour_minute_seconds(lap_time) console.println("") if lap + 1 < self.laps: remaining = (self.laps - lap - 1) * self.lap_times / (lap + 1) hr, mr, sr = convert.seconds_to_hour_minute_seconds(remaining) console.info("Lap time %02d:%02d:%02d (ETA: %02d:%02d:%02d)" % (hl, ml, sl, hr, mr, sr), logger=logger) else: console.info("Lap time %02d:%02d:%02d" % (hl, ml, sl), logger=logger) console.println("")
def after_lap(self, lap): logger.info("Finished lap [%d/%d]" % (lap, self.race.total_laps)) if self.race.total_laps > 1: lap_time = self.lap_timer.split_time() - self.lap_times self.lap_times += lap_time hl, ml, sl = convert.seconds_to_hour_minute_seconds(lap_time) lap_results = reporter.calculate_results(self.metrics_store, self.race, lap) self.race.add_lap_results(lap_results) reporter.summarize(self.race, self.cfg, lap=lap) console.println("") if lap < self.race.total_laps: remaining = (self.race.total_laps - lap) * self.lap_times / lap hr, mr, sr = convert.seconds_to_hour_minute_seconds(remaining) console.info("Lap time %02d:%02d:%02d (ETA: %02d:%02d:%02d)" % (hl, ml, sl, hr, mr, sr), logger=logger) else: console.info("Lap time %02d:%02d:%02d" % (hl, ml, sl), logger=logger) console.println("")
def create_track(cfg): logger = logging.getLogger(__name__) track_name = cfg.opts("track", "track.name") indices = cfg.opts("generator", "indices") root_path = cfg.opts("generator", "output.path") target_hosts = cfg.opts("client", "hosts") client_options = cfg.opts("client", "options") logger.info("Creating track [%s] matching indices [%s]", track_name, indices) client = EsClientFactory( hosts=target_hosts.all_hosts[opts.TargetHosts.DEFAULT], client_options=client_options.all_client_options[ opts.TargetHosts.DEFAULT]).create() info = client.info() console.info( f"Connected to Elasticsearch cluster [{info['name']}] version [{info['version']['number']}].\n", logger=logger) output_path = os.path.abspath( os.path.join(io.normalize_path(root_path), track_name)) io.ensure_dir(output_path) indices, corpora = extract_mappings_and_corpora(client, output_path, indices) if len(indices) == 0: raise RuntimeError("Failed to extract any indices for track!") template_vars = { "track_name": track_name, "indices": indices, "corpora": corpora } track_path = os.path.join(output_path, "track.json") templates_path = os.path.join(cfg.opts("node", "rally.root"), "resources") process_template(templates_path, "track.json.j2", template_vars, track_path) console.println("") console.info( f"Track {track_name} has been created. Run it with: {PROGRAM_NAME} --track-path={output_path}" )
def after_lap(self): logger.info("Finished lap [%d/%d]" % (self.current_lap, self.race.total_laps)) if self.race.total_laps > 1: lap_time = self.lap_timer.split_time() - self.lap_times self.lap_times += lap_time hl, ml, sl = convert.seconds_to_hour_minute_seconds(lap_time) lap_results = reporter.calculate_results(self.metrics_store, self.race, self.current_lap) self.race.add_lap_results(lap_results) reporter.summarize(self.race, self.cfg, lap=self.current_lap) console.println("") if self.current_lap < self.race.total_laps: remaining = (self.race.total_laps - self.current_lap) * self.lap_times / self.current_lap hr, mr, sr = convert.seconds_to_hour_minute_seconds(remaining) console.info("Lap time %02d:%02d:%02d (ETA: %02d:%02d:%02d)" % (hl, ml, sl, hr, mr, sr), logger=logger) else: console.info("Lap time %02d:%02d:%02d" % (hl, ml, sl), logger=logger) console.println("")
def list_races(cfg): races = [] for race in race_store(cfg).list(): races.append([time.to_iso8601(race.trial_timestamp), race.track, race.challenge, race.car, race.user_tag]) if len(races) > 0: console.println("\nRecent races:\n") console.println(tabulate.tabulate(races, headers=["Race Timestamp", "Track", "Challenge", "Car", "User Tag"])) else: console.println("") console.println("No recent races found.")
def from_distribution(version, repo_name, distributions_root): if version.strip() == "": raise exceptions.SystemSetupError( "Could not determine version. Please specify the Elasticsearch distribution " "to download with the command line parameter --distribution-version. " "E.g. --distribution-version=5.0.0") io.ensure_dir(distributions_root) distribution_path = "%s/elasticsearch-%s.tar.gz" % (distributions_root, version) try: repo = distribution_repos[repo_name] except KeyError: raise exceptions.SystemSetupError( "Unknown distribution repository [%s]. Valid values are: [%s]" % (repo_name, ",".join(distribution_repos.keys()))) download_url = repo.download_url(version) logger.info("Resolved download URL [%s] for version [%s]" % (download_url, version)) if not os.path.isfile(distribution_path) or repo.must_download: try: logger.info("Starting download of Elasticsearch [%s]" % version) progress = net.Progress("[INFO] Downloading Elasticsearch %s" % version) net.download(download_url, distribution_path, progress_indicator=progress) progress.finish() logger.info("Successfully downloaded Elasticsearch [%s]." % version) except urllib.error.HTTPError: console.println("[FAILED]") logging.exception( "Cannot download Elasticsearch distribution for version [%s] from [%s]." % (version, download_url)) raise exceptions.SystemSetupError( "Cannot download Elasticsearch distribution from [%s]. Please check that the specified " "version [%s] is correct." % (download_url, version)) else: logger.info( "Skipping download for version [%s]. Found an existing binary locally at [%s]." % (version, distribution_path)) return distribution_path
def decompress_corpus(archive_path, documents_path, uncompressed_size): if uncompressed_size: console.info("Decompressing track data from [%s] to [%s] (resulting size: %.2f GB) ... " % (archive_path, documents_path, convert.bytes_to_gb(uncompressed_size)), end='', flush=True, logger=logger) else: console.info("Decompressing track data from [%s] to [%s] ... " % (archive_path, documents_path), end='', flush=True, logger=logger) io.decompress(archive_path, io.dirname(archive_path)) console.println("[OK]") if not os.path.isfile(documents_path): raise exceptions.DataError("Decompressing [%s] did not create [%s]. Please check with the track author if the compressed " "archive has been created correctly." % (archive_path, documents_path)) extracted_bytes = os.path.getsize(documents_path) if uncompressed_size is not None and extracted_bytes != uncompressed_size: raise exceptions.DataError("[%s] is corrupt. Extracted [%d] bytes but [%d] bytes are expected." % (documents_path, extracted_bytes, uncompressed_size))
def download(cfg, url, local_path, size_in_bytes): offline = cfg.opts("system", "offline.mode") file_exists = os.path.isfile(local_path) # ensure we only skip the download if the file size also matches our expectation if file_exists and (size_in_bytes is None or os.path.getsize(local_path) == size_in_bytes): logger.info("[%s] already exists locally. Skipping download." % local_path) return False if not offline: try: io.ensure_dir(os.path.dirname(local_path)) if size_in_bytes: size_in_mb = round(convert.bytes_to_mb(size_in_bytes)) # ensure output appears immediately console.info("Downloading data from [%s] (%s MB) to [%s] ... " % (url, size_in_mb, local_path), end='', flush=True, logger=logger) else: console.info("Downloading data from [%s] to [%s] ... " % (url, local_path), end='', flush=True, logger=logger) net.download(url, local_path, size_in_bytes) console.println("[OK]") except urllib.error.URLError: logger.exception("Could not download [%s] to [%s]." % (url, local_path)) # file must exist at this point -> verify if not os.path.isfile(local_path): if offline: raise exceptions.SystemSetupError( "Cannot find %s. Please disable offline mode and retry again." % local_path) else: raise exceptions.SystemSetupError( "Cannot download from %s to %s. Please verify that data are available at %s and " "check your internet connection." % (url, local_path, url)) actual_size = os.path.getsize(local_path) if size_in_bytes is not None and actual_size != size_in_bytes: raise exceptions.DataError("[%s] is corrupt. Downloaded [%d] bytes but [%d] bytes are expected." % (local_path, actual_size, size_in_bytes)) return True
def decompress(data_set_path, expected_size_in_bytes): # we assume that track data are always compressed and try to decompress them before running the benchmark basename, extension = io.splitext(data_set_path) decompressed = False if not os.path.isfile(basename) or os.path.getsize(basename) != expected_size_in_bytes: decompressed = True if type.uncompressed_size_in_bytes: console.info("Decompressing track data from [%s] to [%s] (resulting size: %.2f GB) ... " % (data_set_path, basename, convert.bytes_to_gb(type.uncompressed_size_in_bytes)), end='', flush=True, logger=logger) else: console.info("Decompressing track data from [%s] to [%s] ... " % (data_set_path, basename), end='', flush=True, logger=logger) io.decompress(data_set_path, io.dirname(data_set_path)) console.println("[OK]") extracted_bytes = os.path.getsize(basename) if expected_size_in_bytes is not None and extracted_bytes != expected_size_in_bytes: raise exceptions.DataError("[%s] is corrupt. Extracted [%d] bytes but [%d] bytes are expected." % (basename, extracted_bytes, expected_size_in_bytes)) return basename, decompressed
def list_plugins(cfg): plugins = PluginLoader(team_repo(cfg)).plugins() if plugins: console.println("Available Elasticsearch plugins:\n") console.println(tabulate.tabulate([[p.name, p.config] for p in plugins], headers=["Name", "Configuration"])) else: console.println("No Elasticsearch plugins are available.\n")
def on_start_engine(self, msg, sender): logger.info("Received signal from race control to start engine.") self.race_control = sender self.cfg = msg.cfg self.metrics_store = metrics.InMemoryMetricsStore(self.cfg) self.metrics_store.open(ctx=msg.open_metrics_context) # In our startup procedure we first create all mechanics. Only if this succeeds we'll continue. mechanics_and_start_message = [] hosts = self.cfg.opts("client", "hosts") if len(hosts) == 0: raise exceptions.LaunchError("No target hosts are configured.") if msg.external: logger.info("Cluster will not be provisioned by Rally.") # just create one actor for this special case and run it on the coordinator node (i.e. here) m = self.createActor(NodeMechanicActor, #globalName="/rally/mechanic/worker/external", targetActorRequirements={"coordinator": True}) self.children.append(m) mechanics_and_start_message.append((m, msg.for_nodes(ip=hosts))) else: logger.info("Cluster consisting of %s will be provisioned by Rally." % hosts) all_ips_and_ports = to_ip_port(hosts) all_node_ips = extract_all_node_ips(all_ips_and_ports) for ip_port, nodes in nodes_by_host(all_ips_and_ports).items(): ip, port = ip_port if ip == "127.0.0.1": m = self.createActor(NodeMechanicActor, #globalName="/rally/mechanic/worker/localhost", targetActorRequirements={"coordinator": True}) self.children.append(m) mechanics_and_start_message.append((m, msg.for_nodes(all_node_ips, ip, port, nodes))) else: if self.cfg.opts("system", "remote.benchmarking.supported"): logger.info("Benchmarking against %s with external Rally daemon." % hosts) else: logger.error("User tried to benchmark against %s but no external Rally daemon has been started." % hosts) raise exceptions.SystemSetupError("To benchmark remote hosts (e.g. %s) you need to start the Rally daemon " "on each machine including this one." % ip) already_running = actor.actor_system_already_running(ip=ip) logger.info("Actor system on [%s] already running? [%s]" % (ip, str(already_running))) if not already_running: console.println("Waiting for Rally daemon on [%s] " % ip, end="", flush=True) while not actor.actor_system_already_running(ip=ip): console.println(".", end="", flush=True) time.sleep(3) if not already_running: console.println(" [OK]") m = self.createActor(NodeMechanicActor, #globalName="/rally/mechanic/worker/%s" % ip, targetActorRequirements={"ip": ip}) mechanics_and_start_message.append((m, msg.for_nodes(all_node_ips, ip, port, nodes))) self.children.append(m) self.status = "starting" self.received_responses = [] for mechanic_actor, start_message in mechanics_and_start_message: self.send(mechanic_actor, start_message)
def dispatch_sub_command(cfg, sub_command): try: if sub_command == "compare": reporter.compare(cfg) elif sub_command == "list": list(cfg) elif sub_command == "race": race(cfg) else: raise exceptions.SystemSetupError("Unknown subcommand [%s]" % sub_command) return True except exceptions.RallyError as e: logging.exception("Cannot run subcommand [%s]." % sub_command) console.error("Cannot %s. %s" % (sub_command, e)) console.println("") print_help_on_errors() return False except BaseException as e: logging.exception("A fatal error occurred while running subcommand [%s]." % sub_command) console.error("Cannot %s. %s." % (sub_command, e)) console.println("") print_help_on_errors() return False
def list_telemetry(cfg): console.println("Available telemetry devices:\n") console.println(tabulate.tabulate(Telemetry(cfg, devices=[ JitCompiler(cfg, None), Gc(cfg, None), FlightRecorder(cfg, None), PerfStat(cfg, None) ]).list(), ["Command", "Name", "Description"])) console.println("\nKeep in mind that each telemetry device may incur a runtime overhead which can skew results.")
def from_distribution(version, repo_name, distribution_config, distributions_root, plugins): if version.strip() == "": raise exceptions.SystemSetupError("Could not determine version. Please specify the Elasticsearch distribution " "to download with the command line parameter --distribution-version. " "E.g. --distribution-version=5.0.0") io.ensure_dir(distributions_root) distribution_path = "%s/elasticsearch-%s.tar.gz" % (distributions_root, version) repo = DistributionRepository(repo_name, distribution_config, version) download_url = repo.download_url logger.info("Resolved download URL [%s] for version [%s]" % (download_url, version)) if not os.path.isfile(distribution_path) or not repo.cache: try: logger.info("Starting download of Elasticsearch [%s]" % version) progress = net.Progress("[INFO] Downloading Elasticsearch %s" % version) net.download(download_url, distribution_path, progress_indicator=progress) progress.finish() logger.info("Successfully downloaded Elasticsearch [%s]." % version) except urllib.error.HTTPError: console.println("[FAILED]") logging.exception("Cannot download Elasticsearch distribution for version [%s] from [%s]." % (version, download_url)) raise exceptions.SystemSetupError("Cannot download Elasticsearch distribution from [%s]. Please check that the specified " "version [%s] is correct." % (download_url, version)) else: logger.info("Skipping download for version [%s]. Found an existing binary locally at [%s]." % (version, distribution_path)) binaries = {"elasticsearch": distribution_path} for plugin in plugins: # if we have multiple plugin configurations for a plugin we will override entries here but as this is always the same # key-value pair this is ok. plugin_url = repo.plugin_download_url(plugin.name) if plugin_url: binaries[plugin.name] = plugin_url return binaries
def benchmark_only(cfg): console.println(BOGUS_RESULTS_WARNING) set_default_hosts(cfg) # We'll use a special car name for external benchmarks. cfg.add(config.Scope.benchmark, "mechanic", "car.names", ["external"]) return race(cfg, external=True)
def _try_init(self): if not git.is_working_copy(self.src_dir): console.println("Downloading sources for %s from %s to %s." % (self.name, self.remote_url, self.src_dir)) git.clone(self.src_dir, self.remote_url)
def print_help_on_errors(): heading = "Getting further help:" console.println(console.format.bold(heading)) console.println(console.format.underline_for(heading)) console.println("* Check the log files in %s for errors." % application_log_dir_path()) console.println("* Read the documentation at %s" % console.format.link(DOC_LINK)) console.println("* Ask a question in the forum at %s" % console.format.link("https://discuss.elastic.co/c/elasticsearch/rally")) console.println("* Raise an issue at %s and include the log files in %s." % (console.format.link("https://github.com/elastic/rally/issues"), application_log_dir_path()))
def list_cars(): console.println("Available cars:\n") console.println(tabulate.tabulate([[c.name] for c in cars], headers=["Name"]))
def list_tracks(cfg): console.println("Available tracks:\n") console.println(tabulate.tabulate( tabular_data=[[t.name, t.short_description, ",".join(map(str, t.challenges))] for t in tracks(cfg)], headers=["Name", "Description", "Challenges"]))
def main(): check_python_version() start = time.time() # Early init of console output so we start to show everything consistently. console.init(quiet=False) # allow to see a thread-dump on SIGQUIT faulthandler.register(signal.SIGQUIT, file=sys.stderr) pre_configure_logging() args = parse_args() console.init(quiet=args.quiet) console.println(BANNER) cfg = config.Config(config_name=args.configuration_name) sub_command = derive_sub_command(args, cfg) ensure_configuration_present(cfg, args, sub_command) if args.effective_start_date: cfg.add(config.Scope.application, "system", "time.start", args.effective_start_date) cfg.add(config.Scope.application, "system", "time.start.user_provided", True) else: cfg.add(config.Scope.application, "system", "time.start", datetime.datetime.utcnow()) cfg.add(config.Scope.application, "system", "time.start.user_provided", False) cfg.add(config.Scope.applicationOverride, "system", "quiet.mode", args.quiet) # per node? cfg.add(config.Scope.applicationOverride, "system", "offline.mode", args.offline) cfg.add(config.Scope.applicationOverride, "system", "logging.output", args.logging) # Local config per node cfg.add(config.Scope.application, "node", "rally.root", paths.rally_root()) cfg.add(config.Scope.application, "node", "rally.cwd", os.getcwd()) cfg.add(config.Scope.applicationOverride, "mechanic", "source.revision", args.revision) #TODO dm: Consider renaming this one. It's used by different modules if args.distribution_version: cfg.add(config.Scope.applicationOverride, "mechanic", "distribution.version", args.distribution_version) cfg.add(config.Scope.applicationOverride, "mechanic", "distribution.repository", args.distribution_repository) cfg.add(config.Scope.applicationOverride, "mechanic", "repository.name", args.team_repository) cfg.add(config.Scope.applicationOverride, "mechanic", "car.names", csv_to_list(args.car)) cfg.add(config.Scope.applicationOverride, "mechanic", "car.plugins", csv_to_list(args.elasticsearch_plugins)) cfg.add(config.Scope.applicationOverride, "mechanic", "node.datapaths", csv_to_list(args.data_paths)) if args.keep_cluster_running: cfg.add(config.Scope.applicationOverride, "mechanic", "keep.running", True) # force-preserve the cluster nodes. cfg.add(config.Scope.applicationOverride, "mechanic", "preserve.install", True) else: cfg.add(config.Scope.applicationOverride, "mechanic", "keep.running", False) cfg.add(config.Scope.applicationOverride, "mechanic", "preserve.install", convert.to_bool(args.preserve_install)) cfg.add(config.Scope.applicationOverride, "mechanic", "telemetry.devices", csv_to_list(args.telemetry)) cfg.add(config.Scope.applicationOverride, "race", "pipeline", args.pipeline) cfg.add(config.Scope.applicationOverride, "race", "laps", args.laps) cfg.add(config.Scope.applicationOverride, "race", "user.tag", args.user_tag) cfg.add(config.Scope.applicationOverride, "track", "repository.name", args.track_repository) cfg.add(config.Scope.applicationOverride, "track", "track.name", args.track) cfg.add(config.Scope.applicationOverride, "track", "challenge.name", args.challenge) cfg.add(config.Scope.applicationOverride, "track", "include.tasks", csv_to_list(args.include_tasks)) cfg.add(config.Scope.applicationOverride, "track", "test.mode.enabled", args.test_mode) cfg.add(config.Scope.applicationOverride, "track", "auto_manage_indices", to_bool(args.auto_manage_indices)) cfg.add(config.Scope.applicationOverride, "reporting", "format", args.report_format) cfg.add(config.Scope.applicationOverride, "reporting", "output.path", args.report_file) if sub_command == "compare": cfg.add(config.Scope.applicationOverride, "reporting", "baseline.timestamp", args.baseline) cfg.add(config.Scope.applicationOverride, "reporting", "contender.timestamp", args.contender) ################################ # new section name: driver ################################ cfg.add(config.Scope.applicationOverride, "driver", "cluster.health", args.cluster_health) cfg.add(config.Scope.applicationOverride, "driver", "profiling", args.enable_driver_profiling) cfg.add(config.Scope.applicationOverride, "driver", "load_driver_hosts", csv_to_list(args.load_driver_hosts)) if sub_command != "list": # Also needed by mechanic (-> telemetry) - duplicate by module? cfg.add(config.Scope.applicationOverride, "client", "hosts", _normalize_hosts(csv_to_list(args.target_hosts))) client_options = kv_to_map(csv_to_list(args.client_options)) cfg.add(config.Scope.applicationOverride, "client", "options", client_options) if "timeout" not in client_options: console.info("You did not provide an explicit timeout in the client options. Assuming default of 10 seconds.") # split by component? if sub_command == "list": cfg.add(config.Scope.applicationOverride, "system", "list.config.option", args.configuration) cfg.add(config.Scope.applicationOverride, "system", "list.races.max_results", args.limit) configure_logging(cfg) logger.info("OS [%s]" % str(os.uname())) logger.info("Python [%s]" % str(sys.implementation)) logger.info("Rally version [%s]" % version.version()) logger.info("Command line arguments: %s" % args) # Configure networking net.init() if not args.offline: if not net.has_internet_connection(): console.warn("No Internet connection detected. Automatic download of track data sets etc. is disabled.", logger=logger) cfg.add(config.Scope.applicationOverride, "system", "offline.mode", True) else: logger.info("Detected a working Internet connection.") # Kill any lingering Rally processes before attempting to continue - the actor system needs to be a singleton on this machine # noinspection PyBroadException try: process.kill_running_rally_instances() except BaseException: logger.exception("Could not terminate potentially running Rally instances correctly. Attempting to go on anyway.") success = dispatch_sub_command(cfg, sub_command) end = time.time() if success: console.println("") console.info("SUCCESS (took %d seconds)" % (end - start), overline="-", underline="-") else: console.println("") console.info("FAILURE (took %d seconds)" % (end - start), overline="-", underline="-") sys.exit(64)
def list_pipelines(): console.println("Available pipelines:\n") console.println(tabulate.tabulate(available_pipelines(), headers=["Name", "Description"]))
def print_internal(message): console.println(message, logger=logger.info)