def remaining_nodes(): workers = provider.non_terminated_nodes( {TAG_RAY_NODE_TYPE: NODE_TYPE_WORKER}) if keep_min_workers: min_workers = config.get("min_workers", 0) cli_logger.print( "{} random worker nodes will not be shut down. " + cf.gray("(due to {})"), cf.bold(min_workers), cf.bold("--keep-min-workers")) cli_logger.old_info(logger, "teardown_cluster: Keeping {} nodes...", min_workers) workers = random.sample(workers, len(workers) - min_workers) # todo: it's weird to kill the head node but not all workers if workers_only: cli_logger.print( "The head node will not be shut down. " + cf.gray("(due to {})"), cf.bold("--workers-only")) return workers head = provider.non_terminated_nodes( {TAG_RAY_NODE_TYPE: NODE_TYPE_HEAD}) return head + workers
def terminate_node(self, node_id): node = self._get_cached_node(node_id) if self.cache_stopped_nodes: if node.spot_instance_request_id: cli_logger.print( "Terminating instance {} " + cf.gray("(cannot stop spot instances, only terminate)"), node_id) # todo: show node name? cli_logger.old_info( logger, "AWSNodeProvider: terminating node {} (spot nodes cannot " "be stopped, only terminated)", node_id) node.terminate() else: cli_logger.print("Stopping instance {} " + cf.gray( "(to terminate instead, " "set `cache_stopped_nodes: False` " "under `provider` in the cluster configuration)"), node_id) # todo: show node name? cli_logger.old_info( logger, "AWSNodeProvider: stopping node {}. To terminate nodes " "on stop, set 'cache_stopped_nodes: False' in the " "provider config.".format(node_id)) node.stop() else: node.terminate() self.tag_cache.pop(node_id, None) self.tag_cache_pending.pop(node_id, None)
def terminate_nodes(self, node_ids): if not node_ids: return if self.cache_stopped_nodes: spot_ids = [] on_demand_ids = [] for node_id in node_ids: if self._get_cached_node(node_id).spot_instance_request_id: spot_ids += [node_id] else: on_demand_ids += [node_id] if on_demand_ids: # todo: show node names? cli_logger.print( "Stopping instances {} " + cf.gray( "(to terminate instead, " "set `cache_stopped_nodes: False` " "under `provider` in the cluster configuration)"), cli_logger.render_list(on_demand_ids)) cli_logger.old_info( logger, "AWSNodeProvider: stopping nodes {}. To terminate nodes " "on stop, set 'cache_stopped_nodes: False' in the " "provider config.", on_demand_ids) self.ec2.meta.client.stop_instances(InstanceIds=on_demand_ids) if spot_ids: cli_logger.print( "Terminating instances {} " + cf.gray("(cannot stop spot instances, only terminate)"), cli_logger.render_list(spot_ids)) cli_logger.old_info( logger, "AWSNodeProvider: terminating nodes {} (spot nodes cannot " "be stopped, only terminated)", spot_ids) self.ec2.meta.client.terminate_instances(InstanceIds=spot_ids) else: self.ec2.meta.client.terminate_instances(InstanceIds=node_ids) for node_id in node_ids: self.tag_cache.pop(node_id, None) self.tag_cache_pending.pop(node_id, None)
def _format_msg(msg, *args, _tags=None, _numbered=None, _no_format=None, **kwargs): if isinstance(msg, str) or isinstance(msg, ColorfulString): tags_str = "" if _tags is not None: tags_list = [] for k, v in _tags.items(): if v is True: tags_list += [k] continue if v is False: continue tags_list += [k + "=" + v] if tags_list: tags_str = cf.reset( cf.gray(" [{}]".format(", ".join(tags_list)))) numbering_str = "" if _numbered is not None: chars, i, n = _numbered i = str(i) n = str(n) numbering_str = cf.gray(chars[0] + i + "/" + n + chars[1]) + " " if _no_format: # todo: throw if given args/kwargs? return numbering_str + msg + tags_str return numbering_str + msg.format(*args, **kwargs) + tags_str if kwargs: raise ValueError("We do not support printing kwargs yet.") res = [msg, *args] res = [str(x) for x in res] return ", ".join(res)
def write_stdout(level, message): prefix = colorful.bold_yellow(u'\u229b INFO :') if level == 'WARNING': prefix = colorful.bold_red(u'\u2757 WARNING :') message = colorful.yellow(message) added_prefix = u'\n\t\t{}\t{} '.format(colorful.gray(u'\u2502'),' '*len(prefix)) message = message.split('\n') console_write(u'\t\t\u251c\u2501\t{} {}'.format(prefix, added_prefix.join(message)))
def wait_ready(self, deadline): with cli_logger.group("Waiting for SSH to become available", _numbered=("[]", 1, 6)): with LogTimer(self.log_prefix + "Got remote shell"): cli_logger.old_info(logger, "{}Waiting for remote shell...", self.log_prefix) cli_logger.print("Running `{}` as a test.", cf.bold("uptime")) first_conn_refused_time = None while time.time() < deadline and \ not self.provider.is_terminated(self.node_id): try: cli_logger.old_debug(logger, "{}Waiting for remote shell...", self.log_prefix) # Run outside of the container self.cmd_runner.run("uptime", run_env="host") cli_logger.old_debug(logger, "Uptime succeeded.") cli_logger.success("Success.") return True except ProcessRunnerError as e: first_conn_refused_time = \ cmd_output_util.handle_ssh_fails( e, first_conn_refused_time, retry_interval=READY_CHECK_INTERVAL) time.sleep(READY_CHECK_INTERVAL) except Exception as e: # TODO(maximsmol): we should not be ignoring # exceptions if they get filtered properly # (new style log + non-interactive shells) # # however threading this configuration state # is a pain and I'm leaving it for later retry_str = str(e) if hasattr(e, "cmd"): retry_str = "(Exit Status {}): {}".format( e.returncode, " ".join(e.cmd)) cli_logger.print( "SSH still not available {}, " "retrying in {} seconds.", cf.gray(retry_str), cf.bold(str(READY_CHECK_INTERVAL))) cli_logger.old_debug(logger, "{}Node not up, retrying: {}", self.log_prefix, retry_str) time.sleep(READY_CHECK_INTERVAL) assert False, "Unable to connect to node"
def uncertain_num_to_str(x, dx, use_color=True): """ Error is always added at the end. """ n = floor(log10(dx)) dx = round(dx, -n) # re-round in case dx 0.096 -> 0.1 n = floor(log10(dx)) x_str = str(round(x, -n)) dx_str = "(" + str(round(dx * 10 ** (-n))) + ")" if use_color: result_str = x_str + colorful.gray(dx_str) else: result_str = x_str + dx_str return result_str
def run_cmd_redirected(cmd, silent=False, use_login_shells=False): """Run a command and optionally redirect output to a file. Args: cmd (List[str]): Command to run. silent (bool): If true, the command output will be silenced completely (redirected to /dev/null), unless verbose logging is enabled. Use this for runnign utility commands like rsync. """ if silent and cli_logger.verbosity < 1: return _run_and_process_output(cmd, stdout_file=None, use_login_shells=use_login_shells) if not is_output_redirected(): return _run_and_process_output(cmd, stdout_file=sys.stdout, use_login_shells=use_login_shells) else: tmpfile_path = os.path.join( tempfile.gettempdir(), "ray-up-{}-{}.txt".format(cmd[0], time.time())) with open( tmpfile_path, mode="w", # line buffering buffering=1) as tmp: cli_logger.verbose("Command stdout is redirected to {}", cf.bold(tmp.name)) cli_logger.verbose( cf.gray("Use --dump-command-output to " "dump to terminal instead.")) return _run_and_process_output(cmd, stdout_file=tmp, stderr_file=tmp, use_login_shells=use_login_shells)
def wait_ready(self, deadline): with cli_logger.group( "Waiting for SSH to become available", _numbered=("[]", 1, 6)): with LogTimer(self.log_prefix + "Got remote shell"): cli_logger.old_info(logger, "{}Waiting for remote shell...", self.log_prefix) cli_logger.print("Running `{}` as a test.", cf.bold("uptime")) while time.time() < deadline and \ not self.provider.is_terminated(self.node_id): try: cli_logger.old_debug(logger, "{}Waiting for remote shell...", self.log_prefix) self.cmd_runner.run("uptime") cli_logger.old_debug(logger, "Uptime succeeded.") cli_logger.success("Success.") return True except Exception as e: retry_str = str(e) if hasattr(e, "cmd"): retry_str = "(Exit Status {}): {}".format( e.returncode, " ".join(e.cmd)) cli_logger.print( "SSH still not available {}, " "retrying in {} seconds.", cf.gray(retry_str), cf.bold(str(READY_CHECK_INTERVAL))) cli_logger.old_debug(logger, "{}Node not up, retrying: {}", self.log_prefix, retry_str) time.sleep(READY_CHECK_INTERVAL) assert False, "Unable to connect to node"
def confirm(self, yes, msg, *args, _abort=False, _default=False, **kwargs): """Display a confirmation dialog. Valid answers are "y/yes/true/1" and "n/no/false/0". Args: yes (bool): If `yes` is `True` the dialog will default to "yes" and continue without waiting for user input. _abort (bool): If `_abort` is `True`, "no" means aborting the program. _default (bool): The default action to take if the user just presses enter with no input. """ if self.old_style: return should_abort = _abort default = _default if default: yn_str = cf.green("Y") + "/" + cf.red("n") else: yn_str = cf.green("y") + "/" + cf.red("N") confirm_str = cf.underlined("Confirm [" + yn_str + "]:") + " " rendered_message = _format_msg(msg, *args, **kwargs) if rendered_message and rendered_message[-1] != "\n": rendered_message += " " msg_len = len(rendered_message.split("\n")[-1]) complete_str = rendered_message + confirm_str if yes: self._print(complete_str + "y " + cf.gray("[automatic, due to --yes]")) return True self._print(complete_str, linefeed=False) res = None yes_answers = ["y", "yes", "true", "1"] no_answers = ["n", "no", "false", "0"] try: while True: ans = sys.stdin.readline() ans = ans.lower() if ans == "\n": res = default break ans = ans.strip() if ans in yes_answers: res = True break if ans in no_answers: res = False break indent = " " * msg_len self.error("{}Invalid answer: {}. " "Expected {} or {}", indent, cf.bold(ans.strip()), self.render_list(yes_answers, "/"), self.render_list(no_answers, "/")) self._print(indent + confirm_str, linefeed=False) except KeyboardInterrupt: self.newline() res = default if not res and should_abort: # todo: make sure we tell the user if they # need to do cleanup self._print("Exiting...") raise SilentClickException( "Exiting due to the response to confirm(should_abort=True).") return res
def _format_msg(msg, *args, _tags=None, _numbered=None, _no_format=None, **kwargs): """Formats a message for printing. Renders `msg` using the built-in `str.format` and the passed-in `*args` and `**kwargs`. Args: *args (Any): `.format` arguments for `msg`. _tags (Dict[str, Any]): key-value pairs to display at the end of the message in square brackets. If a tag is set to `True`, it is printed without the value, the presence of the tag treated as a "flag". E.g. `_format_msg("hello", _tags=dict(from=mom, signed=True))` `hello [from=Mom, signed]` _numbered (Tuple[str, int, int]): `(brackets, i, n)` The `brackets` string is composed of two "bracket" characters, `i` is the index, `n` is the total. The string `{i}/{n}` surrounded by the "brackets" is prepended to the message. This is used to number steps in a procedure, with different brackets specifying different major tasks. E.g. `_format_msg("hello", _numbered=("[]", 0, 5))` `[0/5] hello` _no_format (bool): If `_no_format` is `True`, `.format` will not be called on the message. Useful if the output is user-provided or may otherwise contain an unexpected formatting string (e.g. "{}"). Returns: The formatted message. """ if isinstance(msg, str) or isinstance(msg, ColorfulString): tags_str = "" if _tags is not None: tags_list = [] for k, v in _tags.items(): if v is True: tags_list += [k] continue if v is False: continue tags_list += [k + "=" + v] if tags_list: tags_str = cf.reset( cf.gray(" [{}]".format(", ".join(tags_list)))) numbering_str = "" if _numbered is not None: chars, i, n = _numbered i = str(i) n = str(n) numbering_str = cf.gray(chars[0] + i + "/" + n + chars[1]) + " " if _no_format: # todo: throw if given args/kwargs? return numbering_str + msg + tags_str return numbering_str + msg.format(*args, **kwargs) + tags_str if kwargs: raise ValueError("We do not support printing kwargs yet.") res = [msg, *args] res = [str(x) for x in res] return ", ".join(res)
def confirm(self, yes, msg, *args, _abort=False, _default=False, **kwargs): if self.old_style: return should_abort = _abort default = _default if default: yn_str = cf.green("Y") + "/" + cf.red("n") else: yn_str = cf.green("y") + "/" + cf.red("N") confirm_str = cf.underlined("Confirm [" + yn_str + "]:") + " " rendered_message = _format_msg(msg, *args, **kwargs) if rendered_message and rendered_message[-1] != "\n": rendered_message += " " msg_len = len(rendered_message.split("\n")[-1]) complete_str = rendered_message + confirm_str if yes: self._print(complete_str + "y " + cf.gray("[automatic, due to --yes]")) return True self._print(complete_str, linefeed=False) res = None yes_answers = ["y", "yes", "true", "1"] no_answers = ["n", "no", "false", "0"] try: while True: ans = sys.stdin.readline() ans = ans.lower() if ans == "\n": res = default break ans = ans.strip() if ans in yes_answers: res = True break if ans in no_answers: res = False break indent = " " * msg_len self.error("{}Invalid answer: {}. " "Expected {} or {}", indent, cf.bold(ans.strip()), self.render_list(yes_answers, "/"), self.render_list(no_answers, "/")) self._print(indent + confirm_str, linefeed=False) except KeyboardInterrupt: self.newline() res = default if not res and should_abort: # todo: make sure we tell the user if they # need to do cleanup self._print("Exiting...") raise SilentClickException( "Exiting due to the response to confirm(should_abort=True).") return res